INFO: Downloading File to /root/OFAKD-DARTS1/...

Succeed: Total num: 55, size: 170,785,025. OK num: 55(download 55 objects).

average speed 206013000(byte/s)

0.832415(s) elapsed
INFO: Downloading succeed.
Network is under initialization...
Network successfully initialized.
Training with a single process on 1 GPUs.
Data processing configuration for current model + dataset:
	input_size: (3, 32, 32)
	interpolation: bilinear
	mean: (0.49139968, 0.48215827, 0.44653124)
	std: (0.24703233, 0.24348505, 0.26158768)
	crop_pct: 1.0
	crop_mode: center

-------------------------------
Learnable parameters
Student: 1.93M
Extra: 0.00M
-------------------------------
Scheduled epochs: 50
p_max: 0.125
search_space = s5
Using downloaded and verified file: /mnt/OFAKD-DARTS1/data/cifar-10-python.tar.gz
Extracting /mnt/OFAKD-DARTS1/data/cifar-10-python.tar.gz to /mnt/OFAKD-DARTS1/data
Train: 0 [   0/390]  Loss: 2.338 (2.34)  Acc@1: 10.9375 (10.9375)  Acc@5: 56.2500 (56.2500)LR: 2.500e-02
Train: 0 [  50/390]  Loss: 1.606 (2.00)  Acc@1: 43.7500 (27.2059)  Acc@5: 92.1875 (79.7181)LR: 2.500e-02
Train: 0 [ 100/390]  Loss: 1.650 (1.89)  Acc@1: 40.6250 (30.7395)  Acc@5: 89.0625 (83.2766)LR: 2.500e-02
Train: 0 [ 150/390]  Loss: 1.447 (1.80)  Acc@1: 53.1250 (33.7748)  Acc@5: 92.1875 (85.4408)LR: 2.500e-02
Train: 0 [ 200/390]  Loss: 1.152 (1.74)  Acc@1: 56.2500 (36.1007)  Acc@5: 93.7500 (86.8626)LR: 2.500e-02
Train: 0 [ 250/390]  Loss: 1.380 (1.68)  Acc@1: 51.5625 (38.2097)  Acc@5: 92.1875 (87.9669)LR: 2.500e-02
Train: 0 [ 300/390]  Loss: 1.160 (1.63)  Acc@1: 51.5625 (39.9917)  Acc@5: 98.4375 (88.8652)LR: 2.500e-02
Train: 0 [ 350/390]  Loss: 1.306 (1.59)  Acc@1: 43.7500 (41.5331)  Acc@5: 98.4375 (89.4676)LR: 2.500e-02
Train: 0 [ 390/390]  Loss: 1.435 (1.56)  Acc@1: 50.0000 (42.6960)  Acc@5: 95.0000 (89.9640)LR: 2.500e-02
train_acc 42.696000
Valid: 0 [   0/390]  Loss: 1.576 (1.58)  Acc@1: 39.0625 (39.0625)  Acc@5: 90.6250 (90.6250)
Valid: 0 [  50/390]  Loss: 1.305 (1.31)  Acc@1: 57.8125 (53.6152)  Acc@5: 90.6250 (94.0564)
Valid: 0 [ 100/390]  Loss: 1.328 (1.32)  Acc@1: 39.0625 (52.6764)  Acc@5: 92.1875 (93.8583)
Valid: 0 [ 150/390]  Loss: 1.580 (1.31)  Acc@1: 43.7500 (52.4524)  Acc@5: 85.9375 (93.9259)
Valid: 0 [ 200/390]  Loss: 1.244 (1.31)  Acc@1: 53.1250 (52.4021)  Acc@5: 96.8750 (94.0299)
Valid: 0 [ 250/390]  Loss: 1.745 (1.30)  Acc@1: 37.5000 (52.6394)  Acc@5: 82.8125 (93.8621)
Valid: 0 [ 300/390]  Loss: 1.202 (1.31)  Acc@1: 56.2500 (52.7253)  Acc@5: 98.4375 (93.8279)
Valid: 0 [ 350/390]  Loss: 1.329 (1.31)  Acc@1: 56.2500 (52.5953)  Acc@5: 100.0000 (93.7990)
Valid: 0 [ 390/390]  Loss: 1.381 (1.31)  Acc@1: 52.5000 (52.6800)  Acc@5: 95.0000 (93.7640)
valid_acc 52.680000
epoch = 0   
 genotype = Genotype(normal=[('sep_conv_5x5', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 2), ('sep_conv_3x3', 1), ('sep_conv_5x5', 3), ('sep_conv_5x5', 1), ('dil_conv_5x5', 4), ('dil_conv_5x5', 2)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 0), ('dil_conv_5x5', 2)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1254, 0.1259, 0.1195, 0.1238, 0.1260, 0.1284, 0.1272, 0.1239],
        [0.1285, 0.1230, 0.1191, 0.1213, 0.1264, 0.1282, 0.1280, 0.1255],
        [0.1261, 0.1260, 0.1185, 0.1216, 0.1264, 0.1273, 0.1276, 0.1264],
        [0.1278, 0.1231, 0.1192, 0.1214, 0.1286, 0.1270, 0.1259, 0.1271],
        [0.1289, 0.1212, 0.1181, 0.1218, 0.1270, 0.1296, 0.1259, 0.1275],
        [0.1266, 0.1248, 0.1190, 0.1215, 0.1263, 0.1264, 0.1280, 0.1274],
        [0.1287, 0.1221, 0.1186, 0.1207, 0.1282, 0.1282, 0.1259, 0.1275],
        [0.1289, 0.1216, 0.1185, 0.1219, 0.1267, 0.1276, 0.1273, 0.1275],
        [0.1305, 0.1207, 0.1185, 0.1208, 0.1275, 0.1283, 0.1264, 0.1272],
        [0.1282, 0.1244, 0.1194, 0.1220, 0.1268, 0.1275, 0.1257, 0.1259],
        [0.1293, 0.1216, 0.1190, 0.1206, 0.1279, 0.1283, 0.1262, 0.1271],
        [0.1306, 0.1199, 0.1178, 0.1213, 0.1270, 0.1286, 0.1257, 0.1292],
        [0.1306, 0.1193, 0.1182, 0.1205, 0.1280, 0.1285, 0.1279, 0.1268],
        [0.1314, 0.1186, 0.1173, 0.1189, 0.1289, 0.1286, 0.1271, 0.1292]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1233, 0.1278, 0.1251, 0.1258, 0.1251, 0.1243, 0.1247, 0.1239],
        [0.1262, 0.1255, 0.1235, 0.1252, 0.1237, 0.1265, 0.1253, 0.1240],
        [0.1245, 0.1269, 0.1241, 0.1258, 0.1241, 0.1258, 0.1239, 0.1249],
        [0.1244, 0.1242, 0.1220, 0.1245, 0.1255, 0.1277, 0.1264, 0.1252],
        [0.1263, 0.1236, 0.1208, 0.1244, 0.1242, 0.1255, 0.1272, 0.1279],
        [0.1235, 0.1271, 0.1242, 0.1247, 0.1257, 0.1254, 0.1256, 0.1239],
        [0.1257, 0.1259, 0.1237, 0.1249, 0.1244, 0.1269, 0.1254, 0.1231],
        [0.1253, 0.1240, 0.1205, 0.1230, 0.1259, 0.1268, 0.1271, 0.1273],
        [0.1254, 0.1251, 0.1221, 0.1257, 0.1269, 0.1244, 0.1259, 0.1247],
        [0.1247, 0.1278, 0.1247, 0.1243, 0.1238, 0.1258, 0.1239, 0.1249],
        [0.1257, 0.1256, 0.1233, 0.1250, 0.1255, 0.1241, 0.1256, 0.1252],
        [0.1248, 0.1245, 0.1217, 0.1244, 0.1266, 0.1260, 0.1252, 0.1267],
        [0.1258, 0.1252, 0.1221, 0.1250, 0.1247, 0.1257, 0.1257, 0.1257],
        [0.1260, 0.1243, 0.1220, 0.1252, 0.1262, 0.1266, 0.1250, 0.1247]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 1 [   0/390]  Loss: 1.081 (1.08)  Acc@1: 67.1875 (67.1875)  Acc@5: 93.7500 (93.7500)LR: 2.498e-02
Train: 1 [  50/390]  Loss: 1.235 (1.28)  Acc@1: 53.1250 (53.2475)  Acc@5: 95.3125 (93.7500)LR: 2.498e-02
Train: 1 [ 100/390]  Loss: 0.9597 (1.25)  Acc@1: 65.6250 (54.0996)  Acc@5: 98.4375 (94.5235)LR: 2.498e-02
Train: 1 [ 150/390]  Loss: 1.220 (1.23)  Acc@1: 46.8750 (55.1635)  Acc@5: 92.1875 (94.5985)LR: 2.498e-02
Train: 1 [ 200/390]  Loss: 1.228 (1.21)  Acc@1: 60.9375 (56.2034)  Acc@5: 93.7500 (94.7606)LR: 2.498e-02
Train: 1 [ 250/390]  Loss: 1.068 (1.19)  Acc@1: 60.9375 (56.9534)  Acc@5: 100.0000 (94.9265)LR: 2.498e-02
Train: 1 [ 300/390]  Loss: 0.8881 (1.17)  Acc@1: 68.7500 (57.5997)  Acc@5: 96.8750 (95.1464)LR: 2.498e-02
Train: 1 [ 350/390]  Loss: 0.8436 (1.16)  Acc@1: 64.0625 (58.3511)  Acc@5: 98.4375 (95.2413)LR: 2.498e-02
Train: 1 [ 390/390]  Loss: 1.367 (1.14)  Acc@1: 50.0000 (58.8600)  Acc@5: 95.0000 (95.3760)LR: 2.498e-02
train_acc 58.860000
Valid: 1 [   0/390]  Loss: 1.350 (1.35)  Acc@1: 53.1250 (53.1250)  Acc@5: 92.1875 (92.1875)
Valid: 1 [  50/390]  Loss: 1.475 (1.36)  Acc@1: 45.3125 (53.9216)  Acc@5: 92.1875 (93.8419)
Valid: 1 [ 100/390]  Loss: 1.532 (1.36)  Acc@1: 39.0625 (53.5736)  Acc@5: 90.6250 (93.9821)
Valid: 1 [ 150/390]  Loss: 1.238 (1.36)  Acc@1: 53.1250 (53.7562)  Acc@5: 96.8750 (94.0087)
Valid: 1 [ 200/390]  Loss: 1.562 (1.36)  Acc@1: 48.4375 (53.9179)  Acc@5: 92.1875 (93.9366)
Valid: 1 [ 250/390]  Loss: 1.260 (1.36)  Acc@1: 56.2500 (53.9343)  Acc@5: 98.4375 (93.9430)
Valid: 1 [ 300/390]  Loss: 1.566 (1.36)  Acc@1: 56.2500 (53.9192)  Acc@5: 90.6250 (94.0667)
Valid: 1 [ 350/390]  Loss: 1.484 (1.36)  Acc@1: 51.5625 (53.8907)  Acc@5: 92.1875 (94.1106)
Valid: 1 [ 390/390]  Loss: 1.347 (1.36)  Acc@1: 62.5000 (53.9640)  Acc@5: 90.0000 (94.1200)
valid_acc 53.964000
epoch = 1   
 genotype = Genotype(normal=[('sep_conv_5x5', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('sep_conv_3x3', 1), ('sep_conv_5x5', 3), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4), ('sep_conv_5x5', 2)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1251, 0.1265, 0.1145, 0.1216, 0.1263, 0.1314, 0.1283, 0.1263],
        [0.1308, 0.1214, 0.1139, 0.1179, 0.1275, 0.1310, 0.1290, 0.1285],
        [0.1270, 0.1264, 0.1131, 0.1186, 0.1265, 0.1302, 0.1300, 0.1284],
        [0.1288, 0.1222, 0.1144, 0.1188, 0.1311, 0.1297, 0.1269, 0.1280],
        [0.1311, 0.1196, 0.1128, 0.1197, 0.1278, 0.1308, 0.1257, 0.1325],
        [0.1283, 0.1254, 0.1137, 0.1182, 0.1284, 0.1294, 0.1270, 0.1297],
        [0.1303, 0.1215, 0.1140, 0.1176, 0.1308, 0.1307, 0.1260, 0.1291],
        [0.1307, 0.1207, 0.1135, 0.1189, 0.1294, 0.1293, 0.1284, 0.1291],
        [0.1323, 0.1195, 0.1141, 0.1183, 0.1287, 0.1326, 0.1268, 0.1277],
        [0.1315, 0.1240, 0.1142, 0.1191, 0.1288, 0.1286, 0.1261, 0.1277],
        [0.1317, 0.1190, 0.1134, 0.1167, 0.1323, 0.1298, 0.1270, 0.1300],
        [0.1345, 0.1170, 0.1117, 0.1177, 0.1298, 0.1326, 0.1244, 0.1324],
        [0.1343, 0.1161, 0.1129, 0.1171, 0.1302, 0.1316, 0.1287, 0.1290],
        [0.1347, 0.1142, 0.1109, 0.1134, 0.1307, 0.1328, 0.1295, 0.1339]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1228, 0.1290, 0.1239, 0.1240, 0.1259, 0.1254, 0.1247, 0.1244],
        [0.1272, 0.1257, 0.1217, 0.1254, 0.1233, 0.1276, 0.1241, 0.1250],
        [0.1252, 0.1286, 0.1231, 0.1251, 0.1231, 0.1264, 0.1242, 0.1243],
        [0.1255, 0.1250, 0.1204, 0.1246, 0.1245, 0.1292, 0.1269, 0.1239],
        [0.1259, 0.1245, 0.1177, 0.1234, 0.1234, 0.1260, 0.1290, 0.1302],
        [0.1242, 0.1288, 0.1229, 0.1235, 0.1260, 0.1263, 0.1238, 0.1246],
        [0.1254, 0.1266, 0.1217, 0.1242, 0.1254, 0.1276, 0.1258, 0.1231],
        [0.1243, 0.1247, 0.1165, 0.1205, 0.1271, 0.1286, 0.1281, 0.1302],
        [0.1257, 0.1241, 0.1182, 0.1238, 0.1279, 0.1257, 0.1275, 0.1271],
        [0.1248, 0.1289, 0.1231, 0.1234, 0.1244, 0.1267, 0.1250, 0.1237],
        [0.1261, 0.1254, 0.1209, 0.1252, 0.1263, 0.1249, 0.1280, 0.1233],
        [0.1243, 0.1247, 0.1186, 0.1229, 0.1269, 0.1282, 0.1247, 0.1296],
        [0.1264, 0.1251, 0.1190, 0.1236, 0.1262, 0.1257, 0.1261, 0.1280],
        [0.1258, 0.1252, 0.1192, 0.1241, 0.1260, 0.1271, 0.1270, 0.1256]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 2 [   0/390]  Loss: 1.042 (1.04)  Acc@1: 65.6250 (65.6250)  Acc@5: 95.3125 (95.3125)LR: 2.491e-02
Train: 2 [  50/390]  Loss: 0.8440 (0.992)  Acc@1: 70.3125 (65.1961)  Acc@5: 98.4375 (96.9363)LR: 2.491e-02
Train: 2 [ 100/390]  Loss: 1.121 (1.01)  Acc@1: 68.7500 (64.2946)  Acc@5: 93.7500 (96.8131)LR: 2.491e-02
Train: 2 [ 150/390]  Loss: 0.9094 (0.987)  Acc@1: 68.7500 (65.3249)  Acc@5: 95.3125 (96.8440)LR: 2.491e-02
Train: 2 [ 200/390]  Loss: 0.9385 (0.988)  Acc@1: 71.8750 (65.2674)  Acc@5: 93.7500 (96.7584)LR: 2.491e-02
Train: 2 [ 250/390]  Loss: 0.8648 (0.979)  Acc@1: 71.8750 (65.5503)  Acc@5: 100.0000 (96.7754)LR: 2.491e-02
Train: 2 [ 300/390]  Loss: 1.253 (0.975)  Acc@1: 56.2500 (65.5575)  Acc@5: 92.1875 (96.8335)LR: 2.491e-02
Train: 2 [ 350/390]  Loss: 0.9133 (0.969)  Acc@1: 68.7500 (65.6384)  Acc@5: 96.8750 (96.8795)LR: 2.491e-02
Train: 2 [ 390/390]  Loss: 0.6853 (0.961)  Acc@1: 77.5000 (65.9760)  Acc@5: 100.0000 (96.9560)LR: 2.491e-02
train_acc 65.976000
Valid: 2 [   0/390]  Loss: 1.119 (1.12)  Acc@1: 59.3750 (59.3750)  Acc@5: 92.1875 (92.1875)
Valid: 2 [  50/390]  Loss: 0.7435 (0.913)  Acc@1: 71.8750 (66.4828)  Acc@5: 100.0000 (97.1201)
Valid: 2 [ 100/390]  Loss: 1.266 (0.906)  Acc@1: 53.1250 (67.4350)  Acc@5: 95.3125 (97.1844)
Valid: 2 [ 150/390]  Loss: 0.7762 (0.898)  Acc@1: 70.3125 (68.2016)  Acc@5: 100.0000 (97.2579)
Valid: 2 [ 200/390]  Loss: 0.6487 (0.900)  Acc@1: 78.1250 (68.0115)  Acc@5: 98.4375 (97.2792)
Valid: 2 [ 250/390]  Loss: 0.7841 (0.904)  Acc@1: 70.3125 (67.9968)  Acc@5: 96.8750 (97.3668)
Valid: 2 [ 300/390]  Loss: 1.282 (0.904)  Acc@1: 60.9375 (68.0388)  Acc@5: 89.0625 (97.2695)
Valid: 2 [ 350/390]  Loss: 0.9797 (0.904)  Acc@1: 54.6875 (68.0467)  Acc@5: 98.4375 (97.2712)
Valid: 2 [ 390/390]  Loss: 0.8618 (0.906)  Acc@1: 65.0000 (68.0720)  Acc@5: 100.0000 (97.2320)
valid_acc 68.072000
epoch = 2   
 genotype = Genotype(normal=[('sep_conv_5x5', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('sep_conv_3x3', 1), ('sep_conv_5x5', 3), ('sep_conv_5x5', 1), ('dil_conv_5x5', 4), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('dil_conv_3x3', 3), ('dil_conv_5x5', 2), ('dil_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1249, 0.1270, 0.1111, 0.1209, 0.1286, 0.1343, 0.1275, 0.1258],
        [0.1326, 0.1197, 0.1098, 0.1159, 0.1292, 0.1338, 0.1297, 0.1294],
        [0.1268, 0.1275, 0.1100, 0.1177, 0.1262, 0.1314, 0.1322, 0.1282],
        [0.1297, 0.1214, 0.1107, 0.1172, 0.1334, 0.1318, 0.1274, 0.1284],
        [0.1333, 0.1187, 0.1095, 0.1197, 0.1282, 0.1308, 0.1261, 0.1336],
        [0.1286, 0.1252, 0.1101, 0.1158, 0.1292, 0.1324, 0.1275, 0.1312],
        [0.1314, 0.1201, 0.1101, 0.1153, 0.1323, 0.1341, 0.1268, 0.1299],
        [0.1321, 0.1194, 0.1096, 0.1172, 0.1304, 0.1319, 0.1291, 0.1302],
        [0.1353, 0.1173, 0.1101, 0.1166, 0.1293, 0.1355, 0.1275, 0.1284],
        [0.1331, 0.1241, 0.1114, 0.1181, 0.1308, 0.1288, 0.1266, 0.1272],
        [0.1328, 0.1171, 0.1091, 0.1142, 0.1358, 0.1323, 0.1279, 0.1307],
        [0.1384, 0.1148, 0.1077, 0.1166, 0.1303, 0.1333, 0.1251, 0.1338],
        [0.1373, 0.1128, 0.1081, 0.1138, 0.1324, 0.1351, 0.1299, 0.1305],
        [0.1379, 0.1107, 0.1061, 0.1096, 0.1340, 0.1357, 0.1291, 0.1368]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1233, 0.1304, 0.1221, 0.1234, 0.1265, 0.1257, 0.1234, 0.1251],
        [0.1275, 0.1267, 0.1209, 0.1246, 0.1241, 0.1292, 0.1228, 0.1242],
        [0.1250, 0.1292, 0.1207, 0.1237, 0.1227, 0.1290, 0.1249, 0.1247],
        [0.1264, 0.1256, 0.1197, 0.1240, 0.1237, 0.1306, 0.1274, 0.1226],
        [0.1270, 0.1245, 0.1139, 0.1220, 0.1240, 0.1272, 0.1300, 0.1315],
        [0.1246, 0.1288, 0.1197, 0.1232, 0.1275, 0.1266, 0.1245, 0.1250],
        [0.1235, 0.1269, 0.1201, 0.1232, 0.1271, 0.1298, 0.1257, 0.1238],
        [0.1252, 0.1250, 0.1123, 0.1185, 0.1287, 0.1306, 0.1289, 0.1308],
        [0.1266, 0.1234, 0.1133, 0.1209, 0.1294, 0.1275, 0.1298, 0.1290],
        [0.1260, 0.1289, 0.1200, 0.1225, 0.1235, 0.1294, 0.1256, 0.1241],
        [0.1260, 0.1270, 0.1206, 0.1248, 0.1264, 0.1261, 0.1275, 0.1216],
        [0.1238, 0.1250, 0.1145, 0.1209, 0.1268, 0.1313, 0.1250, 0.1327],
        [0.1269, 0.1251, 0.1151, 0.1211, 0.1280, 0.1277, 0.1254, 0.1307],
        [0.1241, 0.1256, 0.1160, 0.1231, 0.1249, 0.1288, 0.1286, 0.1289]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 3 [   0/390]  Loss: 0.6379 (0.638)  Acc@1: 76.5625 (76.5625)  Acc@5: 100.0000 (100.0000)LR: 2.479e-02
Train: 3 [  50/390]  Loss: 0.6567 (0.831)  Acc@1: 75.0000 (69.7917)  Acc@5: 98.4375 (98.0086)LR: 2.479e-02
Train: 3 [ 100/390]  Loss: 0.8355 (0.830)  Acc@1: 73.4375 (70.1423)  Acc@5: 98.4375 (97.7104)LR: 2.479e-02
Train: 3 [ 150/390]  Loss: 1.123 (0.844)  Acc@1: 64.0625 (69.8365)  Acc@5: 93.7500 (97.7856)LR: 2.479e-02
Train: 3 [ 200/390]  Loss: 0.7333 (0.843)  Acc@1: 79.6875 (70.0093)  Acc@5: 96.8750 (97.7223)LR: 2.479e-02
Train: 3 [ 250/390]  Loss: 0.7082 (0.841)  Acc@1: 71.8750 (70.1257)  Acc@5: 98.4375 (97.7154)LR: 2.479e-02
Train: 3 [ 300/390]  Loss: 0.7897 (0.842)  Acc@1: 75.0000 (70.1100)  Acc@5: 100.0000 (97.7834)LR: 2.479e-02
Train: 3 [ 350/390]  Loss: 0.9485 (0.841)  Acc@1: 64.0625 (70.2769)  Acc@5: 100.0000 (97.7920)LR: 2.479e-02
Train: 3 [ 390/390]  Loss: 0.9293 (0.835)  Acc@1: 65.0000 (70.4560)  Acc@5: 95.0000 (97.7640)LR: 2.479e-02
train_acc 70.456000
Valid: 3 [   0/390]  Loss: 0.9147 (0.915)  Acc@1: 68.7500 (68.7500)  Acc@5: 98.4375 (98.4375)
Valid: 3 [  50/390]  Loss: 1.053 (0.825)  Acc@1: 62.5000 (72.2120)  Acc@5: 95.3125 (96.9669)
Valid: 3 [ 100/390]  Loss: 0.8805 (0.828)  Acc@1: 75.0000 (72.0606)  Acc@5: 100.0000 (97.1380)
Valid: 3 [ 150/390]  Loss: 0.7359 (0.838)  Acc@1: 78.1250 (71.5128)  Acc@5: 98.4375 (97.0923)
Valid: 3 [ 200/390]  Loss: 0.7203 (0.840)  Acc@1: 76.5625 (71.1754)  Acc@5: 96.8750 (97.1160)
Valid: 3 [ 250/390]  Loss: 0.7980 (0.842)  Acc@1: 78.1250 (71.3210)  Acc@5: 96.8750 (97.1676)
Valid: 3 [ 300/390]  Loss: 0.7959 (0.845)  Acc@1: 75.0000 (71.3767)  Acc@5: 100.0000 (97.0723)
Valid: 3 [ 350/390]  Loss: 0.9860 (0.841)  Acc@1: 60.9375 (71.4076)  Acc@5: 100.0000 (97.1688)
Valid: 3 [ 390/390]  Loss: 0.8986 (0.842)  Acc@1: 67.5000 (71.3880)  Acc@5: 95.0000 (97.1640)
valid_acc 71.388000
epoch = 3   
 genotype = Genotype(normal=[('sep_conv_5x5', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('sep_conv_5x5', 1), ('sep_conv_5x5', 3), ('sep_conv_3x3', 1), ('sep_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('dil_conv_5x5', 2), ('dil_conv_5x5', 2), ('dil_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1254, 0.1257, 0.1069, 0.1191, 0.1290, 0.1369, 0.1297, 0.1274],
        [0.1347, 0.1167, 0.1054, 0.1132, 0.1321, 0.1343, 0.1316, 0.1320],
        [0.1282, 0.1267, 0.1062, 0.1160, 0.1269, 0.1331, 0.1331, 0.1298],
        [0.1303, 0.1190, 0.1065, 0.1147, 0.1348, 0.1354, 0.1298, 0.1295],
        [0.1371, 0.1154, 0.1047, 0.1180, 0.1301, 0.1325, 0.1266, 0.1357],
        [0.1305, 0.1239, 0.1060, 0.1136, 0.1308, 0.1357, 0.1270, 0.1324],
        [0.1341, 0.1168, 0.1055, 0.1124, 0.1341, 0.1387, 0.1285, 0.1299],
        [0.1346, 0.1161, 0.1047, 0.1145, 0.1325, 0.1335, 0.1319, 0.1322],
        [0.1389, 0.1133, 0.1050, 0.1126, 0.1315, 0.1376, 0.1297, 0.1313],
        [0.1364, 0.1224, 0.1075, 0.1164, 0.1324, 0.1308, 0.1268, 0.1273],
        [0.1342, 0.1145, 0.1050, 0.1120, 0.1397, 0.1324, 0.1287, 0.1334],
        [0.1432, 0.1112, 0.1033, 0.1148, 0.1322, 0.1343, 0.1252, 0.1357],
        [0.1425, 0.1091, 0.1035, 0.1110, 0.1350, 0.1362, 0.1317, 0.1309],
        [0.1423, 0.1067, 0.1013, 0.1061, 0.1360, 0.1397, 0.1288, 0.1392]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1226, 0.1321, 0.1213, 0.1220, 0.1266, 0.1267, 0.1233, 0.1255],
        [0.1286, 0.1266, 0.1189, 0.1266, 0.1234, 0.1298, 0.1217, 0.1244],
        [0.1244, 0.1297, 0.1195, 0.1229, 0.1222, 0.1296, 0.1257, 0.1261],
        [0.1270, 0.1260, 0.1187, 0.1222, 0.1242, 0.1329, 0.1267, 0.1223],
        [0.1281, 0.1243, 0.1107, 0.1211, 0.1233, 0.1280, 0.1303, 0.1342],
        [0.1234, 0.1299, 0.1186, 0.1243, 0.1274, 0.1267, 0.1240, 0.1259],
        [0.1236, 0.1267, 0.1178, 0.1239, 0.1278, 0.1308, 0.1249, 0.1245],
        [0.1263, 0.1246, 0.1083, 0.1177, 0.1293, 0.1320, 0.1292, 0.1326],
        [0.1278, 0.1216, 0.1095, 0.1195, 0.1300, 0.1277, 0.1329, 0.1310],
        [0.1253, 0.1300, 0.1193, 0.1221, 0.1221, 0.1302, 0.1265, 0.1245],
        [0.1263, 0.1279, 0.1202, 0.1246, 0.1254, 0.1267, 0.1291, 0.1199],
        [0.1232, 0.1246, 0.1114, 0.1203, 0.1285, 0.1325, 0.1254, 0.1342],
        [0.1276, 0.1237, 0.1122, 0.1203, 0.1300, 0.1272, 0.1260, 0.1330],
        [0.1235, 0.1248, 0.1133, 0.1223, 0.1251, 0.1298, 0.1308, 0.1304]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 4 [   0/390]  Loss: 0.5467 (0.547)  Acc@1: 85.9375 (85.9375)  Acc@5: 100.0000 (100.0000)LR: 2.462e-02
Train: 4 [  50/390]  Loss: 0.7826 (0.733)  Acc@1: 68.7500 (74.1115)  Acc@5: 98.4375 (98.3456)LR: 2.462e-02
Train: 4 [ 100/390]  Loss: 0.7968 (0.749)  Acc@1: 68.7500 (73.2209)  Acc@5: 98.4375 (98.2054)LR: 2.462e-02
Train: 4 [ 150/390]  Loss: 0.4824 (0.758)  Acc@1: 81.2500 (72.9822)  Acc@5: 100.0000 (98.2099)LR: 2.462e-02
Train: 4 [ 200/390]  Loss: 0.8528 (0.760)  Acc@1: 71.8750 (73.1965)  Acc@5: 98.4375 (98.1421)LR: 2.462e-02
Train: 4 [ 250/390]  Loss: 0.6905 (0.754)  Acc@1: 70.3125 (73.3254)  Acc@5: 98.4375 (98.1947)LR: 2.462e-02
Train: 4 [ 300/390]  Loss: 0.8356 (0.752)  Acc@1: 65.6250 (73.4790)  Acc@5: 98.4375 (98.1987)LR: 2.462e-02
Train: 4 [ 350/390]  Loss: 0.7993 (0.754)  Acc@1: 71.8750 (73.5666)  Acc@5: 98.4375 (98.2194)LR: 2.462e-02
Train: 4 [ 390/390]  Loss: 0.7060 (0.752)  Acc@1: 70.0000 (73.7320)  Acc@5: 100.0000 (98.2040)LR: 2.462e-02
train_acc 73.732000
Valid: 4 [   0/390]  Loss: 0.8764 (0.876)  Acc@1: 71.8750 (71.8750)  Acc@5: 95.3125 (95.3125)
Valid: 4 [  50/390]  Loss: 0.7100 (0.754)  Acc@1: 75.0000 (74.1422)  Acc@5: 100.0000 (98.3150)
Valid: 4 [ 100/390]  Loss: 0.8225 (0.760)  Acc@1: 70.3125 (73.9790)  Acc@5: 95.3125 (98.1745)
Valid: 4 [ 150/390]  Loss: 0.6603 (0.746)  Acc@1: 78.1250 (74.6378)  Acc@5: 96.8750 (98.1478)
Valid: 4 [ 200/390]  Loss: 0.6155 (0.745)  Acc@1: 75.0000 (74.4325)  Acc@5: 98.4375 (98.1499)
Valid: 4 [ 250/390]  Loss: 0.5528 (0.749)  Acc@1: 79.6875 (74.2156)  Acc@5: 100.0000 (98.1511)
Valid: 4 [ 300/390]  Loss: 0.9161 (0.743)  Acc@1: 64.0625 (74.3511)  Acc@5: 96.8750 (98.2039)
Valid: 4 [ 350/390]  Loss: 0.5439 (0.744)  Acc@1: 82.8125 (74.2432)  Acc@5: 98.4375 (98.1660)
Valid: 4 [ 390/390]  Loss: 0.9233 (0.744)  Acc@1: 60.0000 (74.1640)  Acc@5: 100.0000 (98.1840)
valid_acc 74.164000
epoch = 4   
 genotype = Genotype(normal=[('sep_conv_5x5', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 1), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('sep_conv_5x5', 3), ('sep_conv_3x3', 1), ('sep_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('dil_conv_5x5', 2), ('dil_conv_5x5', 2), ('dil_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1260, 0.1232, 0.1032, 0.1175, 0.1317, 0.1388, 0.1305, 0.1291],
        [0.1359, 0.1135, 0.1013, 0.1104, 0.1361, 0.1361, 0.1334, 0.1334],
        [0.1297, 0.1260, 0.1032, 0.1150, 0.1275, 0.1337, 0.1334, 0.1316],
        [0.1314, 0.1169, 0.1034, 0.1135, 0.1381, 0.1368, 0.1306, 0.1293],
        [0.1401, 0.1135, 0.1012, 0.1177, 0.1314, 0.1326, 0.1264, 0.1370],
        [0.1328, 0.1231, 0.1033, 0.1124, 0.1320, 0.1355, 0.1278, 0.1332],
        [0.1360, 0.1149, 0.1022, 0.1107, 0.1353, 0.1421, 0.1292, 0.1297],
        [0.1375, 0.1141, 0.1011, 0.1133, 0.1339, 0.1337, 0.1326, 0.1337],
        [0.1429, 0.1102, 0.1011, 0.1107, 0.1327, 0.1391, 0.1305, 0.1328],
        [0.1395, 0.1217, 0.1053, 0.1155, 0.1348, 0.1307, 0.1266, 0.1259],
        [0.1364, 0.1120, 0.1018, 0.1107, 0.1422, 0.1338, 0.1298, 0.1333],
        [0.1468, 0.1091, 0.0999, 0.1139, 0.1317, 0.1354, 0.1254, 0.1377],
        [0.1474, 0.1061, 0.0999, 0.1089, 0.1376, 0.1356, 0.1335, 0.1311],
        [0.1468, 0.1031, 0.0972, 0.1029, 0.1385, 0.1417, 0.1282, 0.1417]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1220, 0.1338, 0.1214, 0.1217, 0.1263, 0.1258, 0.1232, 0.1258],
        [0.1293, 0.1279, 0.1195, 0.1270, 0.1209, 0.1292, 0.1220, 0.1241],
        [0.1232, 0.1311, 0.1195, 0.1215, 0.1220, 0.1300, 0.1270, 0.1257],
        [0.1265, 0.1284, 0.1203, 0.1218, 0.1226, 0.1321, 0.1276, 0.1208],
        [0.1296, 0.1244, 0.1085, 0.1214, 0.1244, 0.1278, 0.1288, 0.1352],
        [0.1222, 0.1317, 0.1186, 0.1246, 0.1263, 0.1272, 0.1227, 0.1266],
        [0.1229, 0.1292, 0.1191, 0.1237, 0.1271, 0.1304, 0.1236, 0.1241],
        [0.1279, 0.1237, 0.1055, 0.1178, 0.1296, 0.1327, 0.1286, 0.1342],
        [0.1291, 0.1203, 0.1069, 0.1188, 0.1310, 0.1273, 0.1348, 0.1318],
        [0.1250, 0.1310, 0.1189, 0.1217, 0.1211, 0.1315, 0.1258, 0.1250],
        [0.1257, 0.1301, 0.1217, 0.1247, 0.1260, 0.1259, 0.1285, 0.1175],
        [0.1228, 0.1234, 0.1087, 0.1192, 0.1289, 0.1351, 0.1263, 0.1356],
        [0.1287, 0.1219, 0.1093, 0.1196, 0.1306, 0.1278, 0.1274, 0.1347],
        [0.1244, 0.1232, 0.1104, 0.1210, 0.1255, 0.1323, 0.1320, 0.1312]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 5 [   0/390]  Loss: 0.5263 (0.526)  Acc@1: 82.8125 (82.8125)  Acc@5: 100.0000 (100.0000)LR: 2.441e-02
Train: 5 [  50/390]  Loss: 0.8298 (0.696)  Acc@1: 73.4375 (76.1642)  Acc@5: 98.4375 (98.1618)LR: 2.441e-02
Train: 5 [ 100/390]  Loss: 0.7503 (0.705)  Acc@1: 75.0000 (75.4332)  Acc@5: 100.0000 (98.2209)LR: 2.441e-02
Train: 5 [ 150/390]  Loss: 0.9614 (0.708)  Acc@1: 76.5625 (75.6209)  Acc@5: 95.3125 (98.2202)LR: 2.441e-02
Train: 5 [ 200/390]  Loss: 0.4825 (0.707)  Acc@1: 81.2500 (75.4742)  Acc@5: 100.0000 (98.2198)LR: 2.441e-02
Train: 5 [ 250/390]  Loss: 0.7075 (0.706)  Acc@1: 71.8750 (75.3797)  Acc@5: 100.0000 (98.2632)LR: 2.441e-02
Train: 5 [ 300/390]  Loss: 0.8511 (0.702)  Acc@1: 73.4375 (75.4776)  Acc@5: 95.3125 (98.3337)LR: 2.441e-02
Train: 5 [ 350/390]  Loss: 0.5454 (0.700)  Acc@1: 78.1250 (75.5653)  Acc@5: 98.4375 (98.3752)LR: 2.441e-02
Train: 5 [ 390/390]  Loss: 0.8417 (0.698)  Acc@1: 67.5000 (75.6280)  Acc@5: 100.0000 (98.4160)LR: 2.441e-02
train_acc 75.628000
Valid: 5 [   0/390]  Loss: 0.8443 (0.844)  Acc@1: 73.4375 (73.4375)  Acc@5: 98.4375 (98.4375)
Valid: 5 [  50/390]  Loss: 0.9501 (0.806)  Acc@1: 62.5000 (72.1507)  Acc@5: 98.4375 (97.9473)
Valid: 5 [ 100/390]  Loss: 0.6684 (0.767)  Acc@1: 70.3125 (73.0507)  Acc@5: 96.8750 (98.1436)
Valid: 5 [ 150/390]  Loss: 0.6704 (0.773)  Acc@1: 81.2500 (73.2099)  Acc@5: 98.4375 (98.0857)
Valid: 5 [ 200/390]  Loss: 0.6663 (0.774)  Acc@1: 76.5625 (73.2509)  Acc@5: 100.0000 (98.0022)
Valid: 5 [ 250/390]  Loss: 0.6918 (0.770)  Acc@1: 78.1250 (73.3130)  Acc@5: 96.8750 (98.0204)
Valid: 5 [ 300/390]  Loss: 1.067 (0.768)  Acc@1: 65.6250 (73.4479)  Acc@5: 95.3125 (98.0430)
Valid: 5 [ 350/390]  Loss: 0.6597 (0.765)  Acc@1: 75.0000 (73.5221)  Acc@5: 98.4375 (98.0146)
Valid: 5 [ 390/390]  Loss: 0.7743 (0.764)  Acc@1: 72.5000 (73.5360)  Acc@5: 100.0000 (98.0480)
valid_acc 73.536000
epoch = 5   
 genotype = Genotype(normal=[('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('sep_conv_5x5', 3), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('dil_conv_5x5', 2), ('dil_conv_5x5', 2), ('dil_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1268, 0.1205, 0.0988, 0.1140, 0.1349, 0.1411, 0.1317, 0.1321],
        [0.1362, 0.1116, 0.0985, 0.1088, 0.1386, 0.1367, 0.1350, 0.1345],
        [0.1316, 0.1238, 0.0994, 0.1126, 0.1286, 0.1350, 0.1363, 0.1328],
        [0.1320, 0.1150, 0.1005, 0.1121, 0.1433, 0.1386, 0.1296, 0.1288],
        [0.1425, 0.1112, 0.0979, 0.1167, 0.1325, 0.1327, 0.1277, 0.1389],
        [0.1365, 0.1215, 0.0998, 0.1103, 0.1330, 0.1359, 0.1284, 0.1345],
        [0.1367, 0.1130, 0.0994, 0.1089, 0.1371, 0.1461, 0.1296, 0.1292],
        [0.1403, 0.1123, 0.0980, 0.1124, 0.1335, 0.1344, 0.1342, 0.1348],
        [0.1461, 0.1070, 0.0975, 0.1082, 0.1324, 0.1401, 0.1342, 0.1346],
        [0.1430, 0.1193, 0.1019, 0.1133, 0.1379, 0.1310, 0.1270, 0.1267],
        [0.1369, 0.1100, 0.0996, 0.1099, 0.1453, 0.1328, 0.1323, 0.1331],
        [0.1505, 0.1070, 0.0972, 0.1134, 0.1335, 0.1349, 0.1253, 0.1382],
        [0.1516, 0.1033, 0.0967, 0.1071, 0.1388, 0.1360, 0.1337, 0.1329],
        [0.1510, 0.0997, 0.0935, 0.0997, 0.1418, 0.1431, 0.1280, 0.1432]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1228, 0.1349, 0.1210, 0.1216, 0.1262, 0.1247, 0.1225, 0.1262],
        [0.1281, 0.1278, 0.1188, 0.1258, 0.1213, 0.1295, 0.1229, 0.1258],
        [0.1232, 0.1321, 0.1194, 0.1205, 0.1222, 0.1311, 0.1263, 0.1252],
        [0.1274, 0.1297, 0.1204, 0.1198, 0.1217, 0.1327, 0.1283, 0.1200],
        [0.1293, 0.1244, 0.1066, 0.1214, 0.1251, 0.1284, 0.1296, 0.1353],
        [0.1223, 0.1326, 0.1179, 0.1257, 0.1259, 0.1279, 0.1212, 0.1267],
        [0.1223, 0.1304, 0.1192, 0.1233, 0.1259, 0.1305, 0.1237, 0.1246],
        [0.1281, 0.1228, 0.1034, 0.1181, 0.1320, 0.1334, 0.1284, 0.1337],
        [0.1302, 0.1194, 0.1047, 0.1181, 0.1313, 0.1280, 0.1365, 0.1317],
        [0.1255, 0.1323, 0.1191, 0.1215, 0.1202, 0.1319, 0.1249, 0.1245],
        [0.1255, 0.1308, 0.1219, 0.1245, 0.1248, 0.1269, 0.1279, 0.1178],
        [0.1223, 0.1223, 0.1069, 0.1189, 0.1306, 0.1357, 0.1265, 0.1367],
        [0.1294, 0.1211, 0.1071, 0.1187, 0.1310, 0.1286, 0.1297, 0.1345],
        [0.1242, 0.1214, 0.1075, 0.1193, 0.1278, 0.1338, 0.1334, 0.1326]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 6 [   0/390]  Loss: 0.6260 (0.626)  Acc@1: 78.1250 (78.1250)  Acc@5: 100.0000 (100.0000)LR: 2.416e-02
Train: 6 [  50/390]  Loss: 0.8717 (0.636)  Acc@1: 67.1875 (78.1863)  Acc@5: 98.4375 (98.8664)LR: 2.416e-02
Train: 6 [ 100/390]  Loss: 0.6831 (0.629)  Acc@1: 71.8750 (78.3106)  Acc@5: 96.8750 (98.8707)LR: 2.416e-02
Train: 6 [ 150/390]  Loss: 0.4800 (0.626)  Acc@1: 81.2500 (78.5493)  Acc@5: 100.0000 (98.8307)LR: 2.416e-02
Train: 6 [ 200/390]  Loss: 0.6505 (0.632)  Acc@1: 78.1250 (78.3271)  Acc@5: 98.4375 (98.7718)LR: 2.416e-02
Train: 6 [ 250/390]  Loss: 0.5592 (0.637)  Acc@1: 78.1250 (78.0129)  Acc@5: 98.4375 (98.7488)LR: 2.416e-02
Train: 6 [ 300/390]  Loss: 0.5688 (0.640)  Acc@1: 78.1250 (77.8447)  Acc@5: 100.0000 (98.7178)LR: 2.416e-02
Train: 6 [ 350/390]  Loss: 0.7533 (0.641)  Acc@1: 78.1250 (77.7555)  Acc@5: 96.8750 (98.6957)LR: 2.416e-02
Train: 6 [ 390/390]  Loss: 0.5543 (0.641)  Acc@1: 75.0000 (77.7400)  Acc@5: 100.0000 (98.7160)LR: 2.416e-02
train_acc 77.740000
Valid: 6 [   0/390]  Loss: 0.8249 (0.825)  Acc@1: 71.8750 (71.8750)  Acc@5: 96.8750 (96.8750)
Valid: 6 [  50/390]  Loss: 0.8278 (0.706)  Acc@1: 73.4375 (75.0306)  Acc@5: 93.7500 (98.5600)
Valid: 6 [ 100/390]  Loss: 0.4725 (0.706)  Acc@1: 81.2500 (75.3713)  Acc@5: 100.0000 (98.3911)
Valid: 6 [ 150/390]  Loss: 0.5962 (0.695)  Acc@1: 79.6875 (75.6209)  Acc@5: 100.0000 (98.4582)
Valid: 6 [ 200/390]  Loss: 0.7111 (0.694)  Acc@1: 76.5625 (75.8473)  Acc@5: 98.4375 (98.4841)
Valid: 6 [ 250/390]  Loss: 0.7813 (0.700)  Acc@1: 75.0000 (75.7595)  Acc@5: 98.4375 (98.4375)
Valid: 6 [ 300/390]  Loss: 0.4897 (0.692)  Acc@1: 82.8125 (75.9967)  Acc@5: 100.0000 (98.4531)
Valid: 6 [ 350/390]  Loss: 0.5104 (0.689)  Acc@1: 81.2500 (76.1351)  Acc@5: 96.8750 (98.4598)
Valid: 6 [ 390/390]  Loss: 0.7965 (0.687)  Acc@1: 67.5000 (76.3200)  Acc@5: 97.5000 (98.4640)
valid_acc 76.320000
epoch = 6   
 genotype = Genotype(normal=[('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('sep_conv_5x5', 3), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 2), ('dil_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1288, 0.1183, 0.0953, 0.1123, 0.1363, 0.1425, 0.1339, 0.1326],
        [0.1360, 0.1092, 0.0951, 0.1065, 0.1422, 0.1383, 0.1366, 0.1360],
        [0.1340, 0.1219, 0.0958, 0.1109, 0.1292, 0.1378, 0.1374, 0.1331],
        [0.1326, 0.1126, 0.0969, 0.1097, 0.1468, 0.1417, 0.1299, 0.1298],
        [0.1456, 0.1084, 0.0940, 0.1152, 0.1348, 0.1340, 0.1287, 0.1393],
        [0.1399, 0.1197, 0.0965, 0.1087, 0.1329, 0.1379, 0.1287, 0.1356],
        [0.1376, 0.1111, 0.0964, 0.1069, 0.1403, 0.1499, 0.1292, 0.1285],
        [0.1448, 0.1094, 0.0945, 0.1114, 0.1343, 0.1347, 0.1360, 0.1348],
        [0.1505, 0.1041, 0.0937, 0.1059, 0.1349, 0.1405, 0.1353, 0.1350],
        [0.1468, 0.1172, 0.0982, 0.1111, 0.1394, 0.1320, 0.1273, 0.1279],
        [0.1386, 0.1080, 0.0970, 0.1090, 0.1486, 0.1339, 0.1317, 0.1332],
        [0.1553, 0.1045, 0.0936, 0.1124, 0.1334, 0.1354, 0.1269, 0.1385],
        [0.1563, 0.1002, 0.0931, 0.1048, 0.1400, 0.1367, 0.1343, 0.1346],
        [0.1562, 0.0962, 0.0892, 0.0964, 0.1433, 0.1437, 0.1289, 0.1460]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1221, 0.1367, 0.1204, 0.1204, 0.1274, 0.1245, 0.1216, 0.1268],
        [0.1293, 0.1279, 0.1185, 0.1248, 0.1215, 0.1294, 0.1228, 0.1258],
        [0.1224, 0.1336, 0.1190, 0.1191, 0.1220, 0.1321, 0.1270, 0.1248],
        [0.1282, 0.1301, 0.1203, 0.1195, 0.1209, 0.1325, 0.1295, 0.1191],
        [0.1309, 0.1235, 0.1041, 0.1211, 0.1253, 0.1287, 0.1294, 0.1370],
        [0.1206, 0.1341, 0.1174, 0.1270, 0.1251, 0.1288, 0.1203, 0.1265],
        [0.1220, 0.1305, 0.1187, 0.1221, 0.1262, 0.1307, 0.1249, 0.1248],
        [0.1282, 0.1219, 0.1012, 0.1176, 0.1353, 0.1353, 0.1277, 0.1327],
        [0.1312, 0.1178, 0.1023, 0.1173, 0.1321, 0.1272, 0.1381, 0.1341],
        [0.1253, 0.1338, 0.1190, 0.1205, 0.1193, 0.1320, 0.1246, 0.1254],
        [0.1255, 0.1323, 0.1229, 0.1230, 0.1252, 0.1274, 0.1269, 0.1168],
        [0.1221, 0.1209, 0.1047, 0.1186, 0.1307, 0.1385, 0.1265, 0.1380],
        [0.1294, 0.1195, 0.1048, 0.1187, 0.1323, 0.1283, 0.1314, 0.1357],
        [0.1240, 0.1202, 0.1056, 0.1195, 0.1288, 0.1351, 0.1337, 0.1331]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 7 [   0/390]  Loss: 0.3955 (0.396)  Acc@1: 89.0625 (89.0625)  Acc@5: 100.0000 (100.0000)LR: 2.386e-02
Train: 7 [  50/390]  Loss: 0.6673 (0.594)  Acc@1: 75.0000 (79.3505)  Acc@5: 100.0000 (99.0809)LR: 2.386e-02
Train: 7 [ 100/390]  Loss: 0.2889 (0.599)  Acc@1: 87.5000 (79.4090)  Acc@5: 100.0000 (98.9325)LR: 2.386e-02
Train: 7 [ 150/390]  Loss: 0.8404 (0.612)  Acc@1: 67.1875 (79.0873)  Acc@5: 98.4375 (98.8100)LR: 2.386e-02
Train: 7 [ 200/390]  Loss: 0.5899 (0.610)  Acc@1: 76.5625 (78.8013)  Acc@5: 98.4375 (98.8417)LR: 2.386e-02
Train: 7 [ 250/390]  Loss: 0.3019 (0.601)  Acc@1: 89.0625 (79.2393)  Acc@5: 100.0000 (98.8919)LR: 2.386e-02
Train: 7 [ 300/390]  Loss: 0.6526 (0.602)  Acc@1: 76.5625 (79.2307)  Acc@5: 98.4375 (98.8684)LR: 2.386e-02
Train: 7 [ 350/390]  Loss: 0.6155 (0.598)  Acc@1: 79.6875 (79.3358)  Acc@5: 98.4375 (98.8337)LR: 2.386e-02
Train: 7 [ 390/390]  Loss: 0.6539 (0.598)  Acc@1: 72.5000 (79.2760)  Acc@5: 100.0000 (98.8200)LR: 2.386e-02
train_acc 79.276000
Valid: 7 [   0/390]  Loss: 0.7191 (0.719)  Acc@1: 75.0000 (75.0000)  Acc@5: 96.8750 (96.8750)
Valid: 7 [  50/390]  Loss: 0.4947 (0.667)  Acc@1: 85.9375 (77.8493)  Acc@5: 98.4375 (98.6213)
Valid: 7 [ 100/390]  Loss: 0.5589 (0.652)  Acc@1: 76.5625 (77.8465)  Acc@5: 96.8750 (98.6386)
Valid: 7 [ 150/390]  Loss: 0.7801 (0.654)  Acc@1: 73.4375 (77.8767)  Acc@5: 100.0000 (98.6548)
Valid: 7 [ 200/390]  Loss: 0.6976 (0.650)  Acc@1: 75.0000 (77.8451)  Acc@5: 100.0000 (98.7562)
Valid: 7 [ 250/390]  Loss: 0.6516 (0.648)  Acc@1: 75.0000 (77.9133)  Acc@5: 98.4375 (98.7176)
Valid: 7 [ 300/390]  Loss: 0.4605 (0.650)  Acc@1: 85.9375 (77.7045)  Acc@5: 98.4375 (98.7386)
Valid: 7 [ 350/390]  Loss: 0.8866 (0.655)  Acc@1: 71.8750 (77.4795)  Acc@5: 98.4375 (98.7179)
Valid: 7 [ 390/390]  Loss: 0.7153 (0.654)  Acc@1: 75.0000 (77.5520)  Acc@5: 100.0000 (98.7400)
valid_acc 77.552000
epoch = 7   
 genotype = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('sep_conv_5x5', 3), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('dil_conv_5x5', 2), ('dil_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1308, 0.1150, 0.0915, 0.1096, 0.1387, 0.1443, 0.1360, 0.1340],
        [0.1369, 0.1062, 0.0923, 0.1044, 0.1464, 0.1399, 0.1368, 0.1371],
        [0.1369, 0.1191, 0.0921, 0.1088, 0.1315, 0.1390, 0.1383, 0.1343],
        [0.1345, 0.1101, 0.0942, 0.1081, 0.1489, 0.1434, 0.1299, 0.1309],
        [0.1487, 0.1053, 0.0904, 0.1132, 0.1359, 0.1361, 0.1311, 0.1393],
        [0.1441, 0.1166, 0.0931, 0.1064, 0.1350, 0.1398, 0.1281, 0.1368],
        [0.1391, 0.1086, 0.0939, 0.1054, 0.1419, 0.1540, 0.1294, 0.1277],
        [0.1490, 0.1063, 0.0908, 0.1093, 0.1364, 0.1370, 0.1365, 0.1347],
        [0.1555, 0.0998, 0.0894, 0.1025, 0.1361, 0.1424, 0.1371, 0.1373],
        [0.1514, 0.1142, 0.0943, 0.1081, 0.1422, 0.1336, 0.1283, 0.1279],
        [0.1408, 0.1057, 0.0944, 0.1075, 0.1511, 0.1334, 0.1316, 0.1355],
        [0.1615, 0.1018, 0.0899, 0.1108, 0.1334, 0.1352, 0.1289, 0.1385],
        [0.1633, 0.0966, 0.0888, 0.1018, 0.1405, 0.1374, 0.1345, 0.1372],
        [0.1609, 0.0924, 0.0849, 0.0926, 0.1446, 0.1458, 0.1291, 0.1496]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1207, 0.1377, 0.1197, 0.1187, 0.1285, 0.1256, 0.1222, 0.1269],
        [0.1304, 0.1273, 0.1171, 0.1240, 0.1204, 0.1295, 0.1237, 0.1276],
        [0.1226, 0.1349, 0.1188, 0.1177, 0.1210, 0.1341, 0.1268, 0.1241],
        [0.1273, 0.1301, 0.1198, 0.1200, 0.1220, 0.1330, 0.1309, 0.1169],
        [0.1324, 0.1237, 0.1025, 0.1222, 0.1245, 0.1295, 0.1286, 0.1365],
        [0.1199, 0.1357, 0.1175, 0.1271, 0.1251, 0.1283, 0.1203, 0.1262],
        [0.1214, 0.1313, 0.1190, 0.1228, 0.1248, 0.1316, 0.1247, 0.1244],
        [0.1287, 0.1225, 0.1003, 0.1192, 0.1359, 0.1362, 0.1269, 0.1303],
        [0.1312, 0.1170, 0.1009, 0.1179, 0.1326, 0.1267, 0.1386, 0.1352],
        [0.1243, 0.1351, 0.1193, 0.1204, 0.1173, 0.1339, 0.1252, 0.1246],
        [0.1251, 0.1331, 0.1233, 0.1235, 0.1244, 0.1281, 0.1255, 0.1169],
        [0.1220, 0.1215, 0.1042, 0.1203, 0.1302, 0.1379, 0.1258, 0.1380],
        [0.1292, 0.1184, 0.1034, 0.1196, 0.1331, 0.1277, 0.1329, 0.1357],
        [0.1259, 0.1196, 0.1043, 0.1205, 0.1282, 0.1345, 0.1341, 0.1329]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 8 [   0/390]  Loss: 0.5758 (0.576)  Acc@1: 78.1250 (78.1250)  Acc@5: 100.0000 (100.0000)LR: 2.352e-02
Train: 8 [  50/390]  Loss: 0.6665 (0.550)  Acc@1: 79.6875 (80.3002)  Acc@5: 100.0000 (99.4179)LR: 2.352e-02
Train: 8 [ 100/390]  Loss: 0.6779 (0.568)  Acc@1: 78.1250 (80.3373)  Acc@5: 98.4375 (99.0873)LR: 2.352e-02
Train: 8 [ 150/390]  Loss: 0.3918 (0.563)  Acc@1: 87.5000 (80.6188)  Acc@5: 100.0000 (99.0687)LR: 2.352e-02
Train: 8 [ 200/390]  Loss: 0.6121 (0.566)  Acc@1: 82.8125 (80.5815)  Acc@5: 98.4375 (99.0594)LR: 2.352e-02
Train: 8 [ 250/390]  Loss: 0.5240 (0.571)  Acc@1: 79.6875 (80.4532)  Acc@5: 100.0000 (99.0164)LR: 2.352e-02
Train: 8 [ 300/390]  Loss: 0.4176 (0.572)  Acc@1: 85.9375 (80.4142)  Acc@5: 100.0000 (99.0189)LR: 2.352e-02
Train: 8 [ 350/390]  Loss: 0.7705 (0.572)  Acc@1: 78.1250 (80.3953)  Acc@5: 98.4375 (99.0073)LR: 2.352e-02
Train: 8 [ 390/390]  Loss: 0.4656 (0.571)  Acc@1: 85.0000 (80.4600)  Acc@5: 97.5000 (99.0080)LR: 2.352e-02
train_acc 80.460000
Valid: 8 [   0/390]  Loss: 0.8210 (0.821)  Acc@1: 71.8750 (71.8750)  Acc@5: 98.4375 (98.4375)
Valid: 8 [  50/390]  Loss: 0.3854 (0.619)  Acc@1: 87.5000 (78.5539)  Acc@5: 100.0000 (98.5907)
Valid: 8 [ 100/390]  Loss: 0.4235 (0.611)  Acc@1: 82.8125 (79.0068)  Acc@5: 98.4375 (98.4066)
Valid: 8 [ 150/390]  Loss: 0.4754 (0.609)  Acc@1: 81.2500 (78.8493)  Acc@5: 96.8750 (98.5410)
Valid: 8 [ 200/390]  Loss: 0.6249 (0.614)  Acc@1: 76.5625 (78.6147)  Acc@5: 100.0000 (98.5774)
Valid: 8 [ 250/390]  Loss: 0.6572 (0.616)  Acc@1: 81.2500 (78.4798)  Acc@5: 96.8750 (98.6429)
Valid: 8 [ 300/390]  Loss: 0.5662 (0.615)  Acc@1: 84.3750 (78.5247)  Acc@5: 98.4375 (98.6296)
Valid: 8 [ 350/390]  Loss: 0.6729 (0.613)  Acc@1: 78.1250 (78.6369)  Acc@5: 96.8750 (98.6156)
Valid: 8 [ 390/390]  Loss: 0.5693 (0.615)  Acc@1: 80.0000 (78.5880)  Acc@5: 97.5000 (98.6120)
valid_acc 78.588000
epoch = 8   
 genotype = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('sep_conv_5x5', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 3), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('dil_conv_5x5', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 3), ('sep_conv_3x3', 2), ('dil_conv_5x5', 2), ('max_pool_3x3', 0)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1319, 0.1115, 0.0886, 0.1077, 0.1417, 0.1479, 0.1359, 0.1348],
        [0.1375, 0.1035, 0.0898, 0.1031, 0.1500, 0.1407, 0.1366, 0.1388],
        [0.1388, 0.1162, 0.0896, 0.1073, 0.1340, 0.1404, 0.1388, 0.1350],
        [0.1353, 0.1075, 0.0921, 0.1070, 0.1531, 0.1462, 0.1281, 0.1306],
        [0.1523, 0.1024, 0.0878, 0.1126, 0.1370, 0.1372, 0.1325, 0.1383],
        [0.1471, 0.1131, 0.0908, 0.1051, 0.1369, 0.1401, 0.1284, 0.1384],
        [0.1405, 0.1052, 0.0919, 0.1040, 0.1433, 0.1582, 0.1291, 0.1277],
        [0.1518, 0.1028, 0.0877, 0.1076, 0.1381, 0.1390, 0.1378, 0.1352],
        [0.1596, 0.0960, 0.0861, 0.0997, 0.1380, 0.1435, 0.1373, 0.1398],
        [0.1557, 0.1115, 0.0921, 0.1070, 0.1434, 0.1330, 0.1296, 0.1277],
        [0.1431, 0.1037, 0.0926, 0.1068, 0.1528, 0.1326, 0.1316, 0.1367],
        [0.1673, 0.0991, 0.0873, 0.1102, 0.1334, 0.1362, 0.1290, 0.1375],
        [0.1689, 0.0935, 0.0857, 0.0993, 0.1403, 0.1373, 0.1353, 0.1396],
        [0.1658, 0.0894, 0.0819, 0.0902, 0.1465, 0.1467, 0.1296, 0.1498]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1196, 0.1375, 0.1186, 0.1180, 0.1312, 0.1259, 0.1220, 0.1271],
        [0.1311, 0.1279, 0.1169, 0.1223, 0.1209, 0.1284, 0.1239, 0.1285],
        [0.1215, 0.1356, 0.1185, 0.1183, 0.1212, 0.1347, 0.1269, 0.1233],
        [0.1275, 0.1303, 0.1199, 0.1211, 0.1220, 0.1325, 0.1313, 0.1154],
        [0.1331, 0.1235, 0.1025, 0.1227, 0.1223, 0.1289, 0.1287, 0.1384],
        [0.1193, 0.1369, 0.1175, 0.1278, 0.1243, 0.1292, 0.1185, 0.1265],
        [0.1200, 0.1318, 0.1193, 0.1230, 0.1243, 0.1316, 0.1252, 0.1250],
        [0.1287, 0.1217, 0.1003, 0.1198, 0.1386, 0.1357, 0.1256, 0.1296],
        [0.1312, 0.1158, 0.0993, 0.1168, 0.1335, 0.1270, 0.1400, 0.1363],
        [0.1241, 0.1361, 0.1196, 0.1198, 0.1170, 0.1338, 0.1257, 0.1240],
        [0.1252, 0.1350, 0.1249, 0.1223, 0.1241, 0.1273, 0.1250, 0.1162],
        [0.1234, 0.1212, 0.1047, 0.1226, 0.1286, 0.1369, 0.1248, 0.1379],
        [0.1297, 0.1180, 0.1027, 0.1200, 0.1345, 0.1271, 0.1324, 0.1355],
        [0.1272, 0.1198, 0.1043, 0.1216, 0.1288, 0.1336, 0.1331, 0.1315]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 9 [   0/390]  Loss: 0.3849 (0.385)  Acc@1: 87.5000 (87.5000)  Acc@5: 98.4375 (98.4375)LR: 2.313e-02
Train: 9 [  50/390]  Loss: 0.4557 (0.514)  Acc@1: 79.6875 (82.3223)  Acc@5: 100.0000 (99.2647)LR: 2.313e-02
Train: 9 [ 100/390]  Loss: 0.4792 (0.513)  Acc@1: 87.5000 (82.0235)  Acc@5: 98.4375 (99.3193)LR: 2.313e-02
Train: 9 [ 150/390]  Loss: 0.4986 (0.522)  Acc@1: 84.3750 (81.5811)  Acc@5: 100.0000 (99.2860)LR: 2.313e-02
Train: 9 [ 200/390]  Loss: 0.4678 (0.531)  Acc@1: 84.3750 (81.2966)  Acc@5: 100.0000 (99.2304)LR: 2.313e-02
Train: 9 [ 250/390]  Loss: 0.4631 (0.528)  Acc@1: 84.3750 (81.4368)  Acc@5: 98.4375 (99.2219)LR: 2.313e-02
Train: 9 [ 300/390]  Loss: 0.3395 (0.533)  Acc@1: 90.6250 (81.2552)  Acc@5: 100.0000 (99.2006)LR: 2.313e-02
Train: 9 [ 350/390]  Loss: 0.5660 (0.536)  Acc@1: 79.6875 (81.2366)  Acc@5: 100.0000 (99.1319)LR: 2.313e-02
Train: 9 [ 390/390]  Loss: 0.7139 (0.536)  Acc@1: 70.0000 (81.2320)  Acc@5: 97.5000 (99.1640)LR: 2.313e-02
train_acc 81.232000
Valid: 9 [   0/390]  Loss: 0.7101 (0.710)  Acc@1: 71.8750 (71.8750)  Acc@5: 100.0000 (100.0000)
Valid: 9 [  50/390]  Loss: 0.7331 (0.600)  Acc@1: 78.1250 (80.4841)  Acc@5: 100.0000 (98.7439)
Valid: 9 [ 100/390]  Loss: 0.5229 (0.589)  Acc@1: 82.8125 (80.7704)  Acc@5: 100.0000 (98.8088)
Valid: 9 [ 150/390]  Loss: 0.5631 (0.594)  Acc@1: 81.2500 (80.2566)  Acc@5: 100.0000 (98.7893)
Valid: 9 [ 200/390]  Loss: 0.6021 (0.602)  Acc@1: 73.4375 (79.8274)  Acc@5: 100.0000 (98.7407)
Valid: 9 [ 250/390]  Loss: 0.6073 (0.596)  Acc@1: 79.6875 (79.9676)  Acc@5: 100.0000 (98.7861)
Valid: 9 [ 300/390]  Loss: 0.7742 (0.604)  Acc@1: 73.4375 (79.5733)  Acc@5: 98.4375 (98.8113)
Valid: 9 [ 350/390]  Loss: 0.6484 (0.604)  Acc@1: 79.6875 (79.5940)  Acc@5: 96.8750 (98.7758)
Valid: 9 [ 390/390]  Loss: 0.7355 (0.601)  Acc@1: 70.0000 (79.6640)  Acc@5: 100.0000 (98.7960)
valid_acc 79.664000
epoch = 9   
 genotype = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 3), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('sep_conv_3x3', 2), ('dil_conv_3x3', 3), ('max_pool_3x3', 0), ('dil_conv_5x5', 2)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1335, 0.1082, 0.0846, 0.1050, 0.1447, 0.1493, 0.1391, 0.1356],
        [0.1387, 0.1005, 0.0866, 0.1013, 0.1545, 0.1424, 0.1369, 0.1391],
        [0.1409, 0.1135, 0.0861, 0.1048, 0.1352, 0.1410, 0.1410, 0.1376],
        [0.1376, 0.1047, 0.0893, 0.1053, 0.1575, 0.1475, 0.1275, 0.1306],
        [0.1574, 0.0989, 0.0838, 0.1105, 0.1387, 0.1377, 0.1343, 0.1387],
        [0.1513, 0.1093, 0.0866, 0.1021, 0.1395, 0.1429, 0.1293, 0.1390],
        [0.1423, 0.1020, 0.0886, 0.1014, 0.1457, 0.1647, 0.1291, 0.1261],
        [0.1581, 0.0996, 0.0838, 0.1059, 0.1381, 0.1397, 0.1401, 0.1347],
        [0.1651, 0.0921, 0.0819, 0.0961, 0.1408, 0.1433, 0.1397, 0.1410],
        [0.1606, 0.1089, 0.0887, 0.1048, 0.1448, 0.1344, 0.1298, 0.1281],
        [0.1473, 0.1008, 0.0897, 0.1050, 0.1542, 0.1323, 0.1326, 0.1380],
        [0.1754, 0.0962, 0.0836, 0.1090, 0.1336, 0.1352, 0.1301, 0.1370],
        [0.1772, 0.0900, 0.0818, 0.0966, 0.1405, 0.1375, 0.1349, 0.1417],
        [0.1734, 0.0857, 0.0777, 0.0867, 0.1469, 0.1455, 0.1305, 0.1537]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1187, 0.1414, 0.1202, 0.1173, 0.1310, 0.1243, 0.1203, 0.1268],
        [0.1320, 0.1291, 0.1173, 0.1215, 0.1206, 0.1282, 0.1230, 0.1282],
        [0.1201, 0.1383, 0.1195, 0.1171, 0.1205, 0.1354, 0.1264, 0.1227],
        [0.1276, 0.1313, 0.1199, 0.1202, 0.1223, 0.1329, 0.1319, 0.1140],
        [0.1345, 0.1211, 0.1004, 0.1225, 0.1238, 0.1278, 0.1297, 0.1401],
        [0.1185, 0.1395, 0.1179, 0.1280, 0.1243, 0.1292, 0.1182, 0.1245],
        [0.1189, 0.1326, 0.1194, 0.1233, 0.1241, 0.1305, 0.1257, 0.1256],
        [0.1297, 0.1190, 0.0979, 0.1199, 0.1413, 0.1367, 0.1266, 0.1290],
        [0.1324, 0.1143, 0.0973, 0.1166, 0.1336, 0.1273, 0.1406, 0.1378],
        [0.1221, 0.1403, 0.1217, 0.1186, 0.1163, 0.1336, 0.1251, 0.1223],
        [0.1244, 0.1364, 0.1256, 0.1223, 0.1237, 0.1272, 0.1243, 0.1161],
        [0.1254, 0.1200, 0.1034, 0.1245, 0.1296, 0.1361, 0.1235, 0.1376],
        [0.1301, 0.1178, 0.1018, 0.1212, 0.1348, 0.1253, 0.1336, 0.1354],
        [0.1281, 0.1194, 0.1036, 0.1229, 0.1283, 0.1351, 0.1324, 0.1302]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 10 [   0/390]  Loss: 0.5704 (0.570)  Acc@1: 75.0000 (75.0000)  Acc@5: 98.4375 (98.4375)LR: 2.271e-02
Train: 10 [  50/390]  Loss: 0.4191 (0.477)  Acc@1: 87.5000 (82.9350)  Acc@5: 98.4375 (99.3566)LR: 2.271e-02
Train: 10 [ 100/390]  Loss: 0.4353 (0.495)  Acc@1: 79.6875 (82.4257)  Acc@5: 100.0000 (99.2574)LR: 2.271e-02
Train: 10 [ 150/390]  Loss: 0.6615 (0.498)  Acc@1: 78.1250 (82.5228)  Acc@5: 98.4375 (99.1618)LR: 2.271e-02
Train: 10 [ 200/390]  Loss: 0.3706 (0.504)  Acc@1: 87.5000 (82.4394)  Acc@5: 100.0000 (99.1993)LR: 2.271e-02
Train: 10 [ 250/390]  Loss: 0.3547 (0.506)  Acc@1: 90.6250 (82.4452)  Acc@5: 100.0000 (99.2032)LR: 2.271e-02
Train: 10 [ 300/390]  Loss: 0.4288 (0.509)  Acc@1: 85.9375 (82.3505)  Acc@5: 96.8750 (99.2162)LR: 2.271e-02
Train: 10 [ 350/390]  Loss: 0.4978 (0.507)  Acc@1: 82.8125 (82.4564)  Acc@5: 100.0000 (99.2165)LR: 2.271e-02
Train: 10 [ 390/390]  Loss: 0.3739 (0.508)  Acc@1: 87.5000 (82.4600)  Acc@5: 100.0000 (99.2360)LR: 2.271e-02
train_acc 82.460000
Valid: 10 [   0/390]  Loss: 0.6270 (0.627)  Acc@1: 85.9375 (85.9375)  Acc@5: 100.0000 (100.0000)
Valid: 10 [  50/390]  Loss: 0.4438 (0.583)  Acc@1: 85.9375 (80.5453)  Acc@5: 98.4375 (98.8664)
Valid: 10 [ 100/390]  Loss: 0.5920 (0.584)  Acc@1: 78.1250 (80.1825)  Acc@5: 100.0000 (98.9790)
Valid: 10 [ 150/390]  Loss: 0.6079 (0.588)  Acc@1: 78.1250 (80.1118)  Acc@5: 98.4375 (98.8411)
Valid: 10 [ 200/390]  Loss: 0.3444 (0.590)  Acc@1: 90.6250 (80.0840)  Acc@5: 100.0000 (98.8262)
Valid: 10 [ 250/390]  Loss: 0.6344 (0.592)  Acc@1: 82.8125 (80.1357)  Acc@5: 100.0000 (98.8048)
Valid: 10 [ 300/390]  Loss: 0.6275 (0.593)  Acc@1: 82.8125 (80.1755)  Acc@5: 96.8750 (98.7853)
Valid: 10 [ 350/390]  Loss: 0.4594 (0.595)  Acc@1: 85.9375 (80.1905)  Acc@5: 100.0000 (98.7669)
Valid: 10 [ 390/390]  Loss: 0.4909 (0.593)  Acc@1: 82.5000 (80.2480)  Acc@5: 100.0000 (98.7680)
valid_acc 80.248000
epoch = 10   
 genotype = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_3x3', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 0), ('max_pool_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1355, 0.1056, 0.0825, 0.1041, 0.1483, 0.1503, 0.1391, 0.1345],
        [0.1390, 0.0980, 0.0843, 0.1000, 0.1587, 0.1429, 0.1377, 0.1395],
        [0.1436, 0.1109, 0.0836, 0.1032, 0.1374, 0.1409, 0.1430, 0.1374],
        [0.1396, 0.1027, 0.0875, 0.1045, 0.1606, 0.1469, 0.1275, 0.1307],
        [0.1610, 0.0960, 0.0809, 0.1091, 0.1412, 0.1382, 0.1347, 0.1389],
        [0.1552, 0.1069, 0.0844, 0.1006, 0.1418, 0.1437, 0.1289, 0.1386],
        [0.1440, 0.0998, 0.0868, 0.1005, 0.1476, 0.1674, 0.1290, 0.1248],
        [0.1616, 0.0965, 0.0809, 0.1045, 0.1396, 0.1408, 0.1400, 0.1362],
        [0.1713, 0.0887, 0.0785, 0.0935, 0.1422, 0.1429, 0.1407, 0.1423],
        [0.1654, 0.1061, 0.0864, 0.1033, 0.1460, 0.1344, 0.1304, 0.1279],
        [0.1499, 0.0981, 0.0871, 0.1032, 0.1580, 0.1322, 0.1336, 0.1377],
        [0.1830, 0.0930, 0.0806, 0.1078, 0.1340, 0.1348, 0.1304, 0.1364],
        [0.1846, 0.0865, 0.0784, 0.0936, 0.1408, 0.1377, 0.1357, 0.1426],
        [0.1805, 0.0821, 0.0741, 0.0834, 0.1481, 0.1453, 0.1314, 0.1552]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1167, 0.1437, 0.1215, 0.1167, 0.1309, 0.1243, 0.1198, 0.1266],
        [0.1331, 0.1296, 0.1174, 0.1213, 0.1191, 0.1274, 0.1235, 0.1286],
        [0.1182, 0.1403, 0.1205, 0.1165, 0.1191, 0.1350, 0.1273, 0.1231],
        [0.1281, 0.1339, 0.1215, 0.1199, 0.1200, 0.1314, 0.1332, 0.1121],
        [0.1344, 0.1210, 0.0994, 0.1231, 0.1239, 0.1289, 0.1301, 0.1391],
        [0.1177, 0.1421, 0.1195, 0.1283, 0.1241, 0.1280, 0.1182, 0.1222],
        [0.1179, 0.1337, 0.1205, 0.1242, 0.1239, 0.1292, 0.1254, 0.1251],
        [0.1300, 0.1182, 0.0969, 0.1211, 0.1424, 0.1360, 0.1269, 0.1286],
        [0.1337, 0.1122, 0.0955, 0.1171, 0.1339, 0.1270, 0.1416, 0.1390],
        [0.1210, 0.1431, 0.1238, 0.1177, 0.1159, 0.1334, 0.1246, 0.1205],
        [0.1239, 0.1392, 0.1276, 0.1224, 0.1238, 0.1263, 0.1233, 0.1137],
        [0.1260, 0.1189, 0.1023, 0.1262, 0.1308, 0.1358, 0.1217, 0.1382],
        [0.1314, 0.1158, 0.1002, 0.1223, 0.1356, 0.1254, 0.1345, 0.1348],
        [0.1317, 0.1174, 0.1022, 0.1245, 0.1267, 0.1352, 0.1325, 0.1298]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 11 [   0/390]  Loss: 0.6219 (0.622)  Acc@1: 78.1250 (78.1250)  Acc@5: 100.0000 (100.0000)LR: 2.225e-02
Train: 11 [  50/390]  Loss: 0.4470 (0.469)  Acc@1: 82.8125 (84.0380)  Acc@5: 98.4375 (99.3873)LR: 2.225e-02
Train: 11 [ 100/390]  Loss: 0.4689 (0.467)  Acc@1: 87.5000 (83.6479)  Acc@5: 98.4375 (99.2884)LR: 2.225e-02
Train: 11 [ 150/390]  Loss: 0.6097 (0.471)  Acc@1: 82.8125 (83.6300)  Acc@5: 100.0000 (99.3171)LR: 2.225e-02
Train: 11 [ 200/390]  Loss: 0.5423 (0.477)  Acc@1: 82.8125 (83.4810)  Acc@5: 98.4375 (99.2926)LR: 2.225e-02
Train: 11 [ 250/390]  Loss: 0.6159 (0.481)  Acc@1: 79.6875 (83.3914)  Acc@5: 100.0000 (99.2654)LR: 2.225e-02
Train: 11 [ 300/390]  Loss: 0.5864 (0.487)  Acc@1: 81.2500 (83.2693)  Acc@5: 98.4375 (99.2265)LR: 2.225e-02
Train: 11 [ 350/390]  Loss: 0.7533 (0.488)  Acc@1: 78.1250 (83.1953)  Acc@5: 100.0000 (99.2521)LR: 2.225e-02
Train: 11 [ 390/390]  Loss: 0.4771 (0.487)  Acc@1: 82.5000 (83.1800)  Acc@5: 97.5000 (99.2480)LR: 2.225e-02
train_acc 83.180000
Valid: 11 [   0/390]  Loss: 0.4730 (0.473)  Acc@1: 85.9375 (85.9375)  Acc@5: 98.4375 (98.4375)
Valid: 11 [  50/390]  Loss: 0.3028 (0.570)  Acc@1: 87.5000 (80.5453)  Acc@5: 100.0000 (98.9890)
Valid: 11 [ 100/390]  Loss: 0.4659 (0.556)  Acc@1: 84.3750 (80.8942)  Acc@5: 100.0000 (98.9790)
Valid: 11 [ 150/390]  Loss: 0.6816 (0.546)  Acc@1: 76.5625 (81.3224)  Acc@5: 100.0000 (99.0584)
Valid: 11 [ 200/390]  Loss: 0.5917 (0.546)  Acc@1: 84.3750 (81.3511)  Acc@5: 98.4375 (99.0283)
Valid: 11 [ 250/390]  Loss: 0.4672 (0.549)  Acc@1: 81.2500 (81.2998)  Acc@5: 100.0000 (98.9853)
Valid: 11 [ 300/390]  Loss: 0.5945 (0.551)  Acc@1: 78.1250 (81.2656)  Acc@5: 98.4375 (98.9929)
Valid: 11 [ 350/390]  Loss: 0.7519 (0.554)  Acc@1: 79.6875 (81.1832)  Acc@5: 98.4375 (98.9850)
Valid: 11 [ 390/390]  Loss: 0.7224 (0.552)  Acc@1: 72.5000 (81.2880)  Acc@5: 97.5000 (99.0000)
valid_acc 81.288000
epoch = 11   
 genotype = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 3), ('max_pool_3x3', 0), ('max_pool_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1362, 0.1025, 0.0792, 0.1016, 0.1512, 0.1533, 0.1408, 0.1351],
        [0.1406, 0.0949, 0.0814, 0.0981, 0.1625, 0.1453, 0.1385, 0.1387],
        [0.1473, 0.1088, 0.0804, 0.1011, 0.1365, 0.1429, 0.1451, 0.1380],
        [0.1420, 0.0997, 0.0845, 0.1025, 0.1647, 0.1471, 0.1275, 0.1319],
        [0.1660, 0.0933, 0.0774, 0.1077, 0.1408, 0.1384, 0.1365, 0.1399],
        [0.1605, 0.1045, 0.0815, 0.0987, 0.1439, 0.1438, 0.1288, 0.1383],
        [0.1460, 0.0972, 0.0842, 0.0992, 0.1509, 0.1699, 0.1288, 0.1237],
        [0.1670, 0.0939, 0.0778, 0.1034, 0.1404, 0.1407, 0.1399, 0.1368],
        [0.1774, 0.0851, 0.0748, 0.0906, 0.1434, 0.1424, 0.1430, 0.1433],
        [0.1703, 0.1037, 0.0833, 0.1008, 0.1474, 0.1356, 0.1316, 0.1273],
        [0.1541, 0.0950, 0.0843, 0.1014, 0.1604, 0.1293, 0.1354, 0.1401],
        [0.1898, 0.0902, 0.0774, 0.1065, 0.1329, 0.1361, 0.1323, 0.1347],
        [0.1948, 0.0831, 0.0749, 0.0913, 0.1411, 0.1362, 0.1368, 0.1418],
        [0.1896, 0.0783, 0.0703, 0.0800, 0.1477, 0.1458, 0.1317, 0.1567]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1161, 0.1455, 0.1209, 0.1147, 0.1311, 0.1250, 0.1186, 0.1279],
        [0.1332, 0.1312, 0.1181, 0.1210, 0.1189, 0.1265, 0.1224, 0.1287],
        [0.1169, 0.1423, 0.1204, 0.1177, 0.1178, 0.1351, 0.1274, 0.1225],
        [0.1285, 0.1356, 0.1222, 0.1192, 0.1183, 0.1314, 0.1343, 0.1105],
        [0.1350, 0.1200, 0.0981, 0.1235, 0.1246, 0.1284, 0.1307, 0.1398],
        [0.1170, 0.1435, 0.1184, 0.1288, 0.1236, 0.1290, 0.1177, 0.1221],
        [0.1166, 0.1353, 0.1209, 0.1247, 0.1221, 0.1294, 0.1256, 0.1255],
        [0.1305, 0.1171, 0.0959, 0.1216, 0.1431, 0.1362, 0.1267, 0.1288],
        [0.1351, 0.1113, 0.0940, 0.1170, 0.1329, 0.1250, 0.1432, 0.1415],
        [0.1199, 0.1443, 0.1229, 0.1176, 0.1156, 0.1335, 0.1255, 0.1207],
        [0.1235, 0.1413, 0.1287, 0.1214, 0.1225, 0.1260, 0.1227, 0.1139],
        [0.1259, 0.1174, 0.1004, 0.1263, 0.1315, 0.1357, 0.1224, 0.1404],
        [0.1334, 0.1150, 0.0983, 0.1228, 0.1359, 0.1238, 0.1362, 0.1345],
        [0.1344, 0.1156, 0.1001, 0.1244, 0.1272, 0.1358, 0.1323, 0.1301]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 12 [   0/390]  Loss: 0.4793 (0.479)  Acc@1: 84.3750 (84.3750)  Acc@5: 98.4375 (98.4375)LR: 2.175e-02
Train: 12 [  50/390]  Loss: 0.4918 (0.462)  Acc@1: 81.2500 (83.7010)  Acc@5: 100.0000 (99.4485)LR: 2.175e-02
Train: 12 [ 100/390]  Loss: 0.2660 (0.441)  Acc@1: 89.0625 (84.6689)  Acc@5: 100.0000 (99.3812)LR: 2.175e-02
Train: 12 [ 150/390]  Loss: 0.2869 (0.451)  Acc@1: 92.1875 (84.4164)  Acc@5: 100.0000 (99.3584)LR: 2.175e-02
Train: 12 [ 200/390]  Loss: 0.5854 (0.453)  Acc@1: 79.6875 (84.3050)  Acc@5: 98.4375 (99.3237)LR: 2.175e-02
Train: 12 [ 250/390]  Loss: 0.2701 (0.457)  Acc@1: 92.1875 (84.2629)  Acc@5: 100.0000 (99.3152)LR: 2.175e-02
Train: 12 [ 300/390]  Loss: 0.6219 (0.459)  Acc@1: 76.5625 (84.1674)  Acc@5: 100.0000 (99.3200)LR: 2.175e-02
Train: 12 [ 350/390]  Loss: 0.4360 (0.462)  Acc@1: 87.5000 (84.1168)  Acc@5: 100.0000 (99.3234)LR: 2.175e-02
Train: 12 [ 390/390]  Loss: 0.6093 (0.463)  Acc@1: 82.5000 (84.1160)  Acc@5: 100.0000 (99.3360)LR: 2.175e-02
train_acc 84.116000
Valid: 12 [   0/390]  Loss: 0.2913 (0.291)  Acc@1: 87.5000 (87.5000)  Acc@5: 100.0000 (100.0000)
Valid: 12 [  50/390]  Loss: 0.5598 (0.548)  Acc@1: 84.3750 (82.0466)  Acc@5: 98.4375 (98.8664)
Valid: 12 [ 100/390]  Loss: 0.6779 (0.523)  Acc@1: 75.0000 (82.4103)  Acc@5: 100.0000 (98.9480)
Valid: 12 [ 150/390]  Loss: 0.6594 (0.538)  Acc@1: 79.6875 (81.7674)  Acc@5: 98.4375 (99.0480)
Valid: 12 [ 200/390]  Loss: 0.5643 (0.540)  Acc@1: 81.2500 (81.6154)  Acc@5: 96.8750 (99.0283)
Valid: 12 [ 250/390]  Loss: 0.4700 (0.539)  Acc@1: 85.9375 (81.7667)  Acc@5: 96.8750 (98.9915)
Valid: 12 [ 300/390]  Loss: 0.5862 (0.540)  Acc@1: 78.1250 (81.7120)  Acc@5: 96.8750 (98.9670)
Valid: 12 [ 350/390]  Loss: 0.6639 (0.538)  Acc@1: 79.6875 (81.7664)  Acc@5: 95.3125 (98.9361)
Valid: 12 [ 390/390]  Loss: 0.4584 (0.537)  Acc@1: 82.5000 (81.8600)  Acc@5: 100.0000 (98.9480)
valid_acc 81.860000
epoch = 12   
 genotype = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('sep_conv_3x3', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1373, 0.1005, 0.0770, 0.1006, 0.1538, 0.1555, 0.1404, 0.1348],
        [0.1425, 0.0920, 0.0786, 0.0959, 0.1660, 0.1455, 0.1404, 0.1392],
        [0.1496, 0.1078, 0.0785, 0.1004, 0.1368, 0.1434, 0.1444, 0.1390],
        [0.1452, 0.0968, 0.0818, 0.1003, 0.1668, 0.1485, 0.1272, 0.1335],
        [0.1703, 0.0909, 0.0749, 0.1068, 0.1404, 0.1384, 0.1393, 0.1391],
        [0.1644, 0.1022, 0.0793, 0.0977, 0.1448, 0.1455, 0.1288, 0.1373],
        [0.1488, 0.0943, 0.0817, 0.0975, 0.1531, 0.1721, 0.1287, 0.1238],
        [0.1710, 0.0905, 0.0749, 0.1016, 0.1433, 0.1408, 0.1406, 0.1373],
        [0.1841, 0.0812, 0.0713, 0.0876, 0.1451, 0.1408, 0.1456, 0.1443],
        [0.1753, 0.1019, 0.0809, 0.0993, 0.1480, 0.1358, 0.1319, 0.1268],
        [0.1569, 0.0918, 0.0810, 0.0985, 0.1645, 0.1283, 0.1377, 0.1414],
        [0.1979, 0.0873, 0.0741, 0.1048, 0.1321, 0.1353, 0.1329, 0.1355],
        [0.2046, 0.0795, 0.0713, 0.0883, 0.1412, 0.1363, 0.1366, 0.1421],
        [0.1977, 0.0752, 0.0669, 0.0770, 0.1470, 0.1447, 0.1318, 0.1596]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1152, 0.1467, 0.1210, 0.1138, 0.1308, 0.1270, 0.1186, 0.1268],
        [0.1336, 0.1309, 0.1174, 0.1221, 0.1179, 0.1266, 0.1221, 0.1293],
        [0.1163, 0.1440, 0.1210, 0.1162, 0.1175, 0.1364, 0.1279, 0.1206],
        [0.1283, 0.1352, 0.1211, 0.1184, 0.1185, 0.1319, 0.1362, 0.1105],
        [0.1341, 0.1189, 0.0966, 0.1232, 0.1241, 0.1291, 0.1328, 0.1412],
        [0.1159, 0.1448, 0.1186, 0.1293, 0.1225, 0.1291, 0.1182, 0.1217],
        [0.1170, 0.1350, 0.1207, 0.1256, 0.1215, 0.1273, 0.1262, 0.1266],
        [0.1310, 0.1162, 0.0952, 0.1228, 0.1444, 0.1364, 0.1258, 0.1280],
        [0.1356, 0.1103, 0.0935, 0.1182, 0.1318, 0.1237, 0.1443, 0.1428],
        [0.1191, 0.1455, 0.1231, 0.1173, 0.1142, 0.1349, 0.1243, 0.1215],
        [0.1233, 0.1419, 0.1292, 0.1212, 0.1226, 0.1257, 0.1222, 0.1138],
        [0.1264, 0.1162, 0.0991, 0.1277, 0.1320, 0.1358, 0.1212, 0.1416],
        [0.1338, 0.1136, 0.0974, 0.1244, 0.1349, 0.1240, 0.1372, 0.1346],
        [0.1375, 0.1135, 0.0981, 0.1243, 0.1271, 0.1360, 0.1331, 0.1303]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 13 [   0/390]  Loss: 0.4082 (0.408)  Acc@1: 89.0625 (89.0625)  Acc@5: 98.4375 (98.4375)LR: 2.121e-02
Train: 13 [  50/390]  Loss: 0.4320 (0.444)  Acc@1: 85.9375 (84.4363)  Acc@5: 100.0000 (99.5404)LR: 2.121e-02
Train: 13 [ 100/390]  Loss: 0.2344 (0.418)  Acc@1: 92.1875 (85.4579)  Acc@5: 100.0000 (99.4585)LR: 2.121e-02
Train: 13 [ 150/390]  Loss: 0.3818 (0.436)  Acc@1: 92.1875 (84.9441)  Acc@5: 98.4375 (99.4826)LR: 2.121e-02
Train: 13 [ 200/390]  Loss: 0.5399 (0.442)  Acc@1: 78.1250 (84.6859)  Acc@5: 100.0000 (99.4947)LR: 2.121e-02
Train: 13 [ 250/390]  Loss: 0.3909 (0.445)  Acc@1: 84.3750 (84.6178)  Acc@5: 100.0000 (99.4833)LR: 2.121e-02
Train: 13 [ 300/390]  Loss: 0.3882 (0.444)  Acc@1: 90.6250 (84.6553)  Acc@5: 98.4375 (99.4653)LR: 2.121e-02
Train: 13 [ 350/390]  Loss: 0.5457 (0.444)  Acc@1: 79.6875 (84.5664)  Acc@5: 98.4375 (99.4703)LR: 2.121e-02
Train: 13 [ 390/390]  Loss: 0.5237 (0.442)  Acc@1: 82.5000 (84.6440)  Acc@5: 100.0000 (99.4640)LR: 2.121e-02
train_acc 84.644000
Valid: 13 [   0/390]  Loss: 0.2402 (0.240)  Acc@1: 95.3125 (95.3125)  Acc@5: 100.0000 (100.0000)
Valid: 13 [  50/390]  Loss: 0.3974 (0.559)  Acc@1: 87.5000 (81.2806)  Acc@5: 100.0000 (98.8358)
Valid: 13 [ 100/390]  Loss: 0.5829 (0.571)  Acc@1: 84.3750 (81.1572)  Acc@5: 98.4375 (98.9171)
Valid: 13 [ 150/390]  Loss: 0.6330 (0.594)  Acc@1: 76.5625 (80.4739)  Acc@5: 98.4375 (98.9652)
Valid: 13 [ 200/390]  Loss: 1.132 (0.594)  Acc@1: 62.5000 (80.3327)  Acc@5: 98.4375 (99.0050)
Valid: 13 [ 250/390]  Loss: 0.3801 (0.598)  Acc@1: 84.3750 (80.1233)  Acc@5: 100.0000 (99.0040)
Valid: 13 [ 300/390]  Loss: 0.4698 (0.592)  Acc@1: 82.8125 (80.2793)  Acc@5: 98.4375 (98.9929)
Valid: 13 [ 350/390]  Loss: 0.5280 (0.591)  Acc@1: 82.8125 (80.1905)  Acc@5: 100.0000 (98.9939)
Valid: 13 [ 390/390]  Loss: 0.6491 (0.593)  Acc@1: 82.5000 (80.0960)  Acc@5: 100.0000 (98.9840)
valid_acc 80.096000
epoch = 13   
 genotype = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_3x3', 2), ('dil_conv_5x5', 3), ('max_pool_3x3', 0), ('max_pool_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1381, 0.0980, 0.0751, 0.0997, 0.1570, 0.1577, 0.1407, 0.1336],
        [0.1463, 0.0887, 0.0760, 0.0946, 0.1661, 0.1465, 0.1421, 0.1398],
        [0.1520, 0.1056, 0.0767, 0.0998, 0.1368, 0.1437, 0.1454, 0.1401],
        [0.1483, 0.0932, 0.0790, 0.0982, 0.1705, 0.1500, 0.1263, 0.1345],
        [0.1761, 0.0871, 0.0718, 0.1047, 0.1413, 0.1384, 0.1413, 0.1394],
        [0.1673, 0.0996, 0.0773, 0.0969, 0.1458, 0.1473, 0.1289, 0.1368],
        [0.1527, 0.0912, 0.0794, 0.0963, 0.1543, 0.1750, 0.1277, 0.1234],
        [0.1783, 0.0868, 0.0719, 0.1001, 0.1446, 0.1388, 0.1417, 0.1377],
        [0.1881, 0.0777, 0.0684, 0.0853, 0.1481, 0.1414, 0.1483, 0.1427],
        [0.1809, 0.0992, 0.0787, 0.0982, 0.1486, 0.1349, 0.1321, 0.1273],
        [0.1623, 0.0878, 0.0777, 0.0958, 0.1687, 0.1267, 0.1381, 0.1429],
        [0.2079, 0.0830, 0.0704, 0.1019, 0.1320, 0.1352, 0.1338, 0.1358],
        [0.2151, 0.0756, 0.0679, 0.0852, 0.1417, 0.1353, 0.1369, 0.1423],
        [0.2075, 0.0715, 0.0636, 0.0739, 0.1465, 0.1451, 0.1317, 0.1601]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1140, 0.1482, 0.1220, 0.1127, 0.1309, 0.1273, 0.1185, 0.1263],
        [0.1345, 0.1321, 0.1188, 0.1211, 0.1169, 0.1246, 0.1221, 0.1300],
        [0.1153, 0.1451, 0.1222, 0.1155, 0.1172, 0.1364, 0.1287, 0.1196],
        [0.1277, 0.1359, 0.1219, 0.1188, 0.1176, 0.1312, 0.1383, 0.1085],
        [0.1350, 0.1171, 0.0958, 0.1232, 0.1231, 0.1303, 0.1332, 0.1423],
        [0.1148, 0.1452, 0.1191, 0.1302, 0.1228, 0.1298, 0.1174, 0.1206],
        [0.1160, 0.1356, 0.1217, 0.1252, 0.1218, 0.1259, 0.1264, 0.1274],
        [0.1323, 0.1140, 0.0942, 0.1240, 0.1463, 0.1361, 0.1236, 0.1295],
        [0.1362, 0.1074, 0.0924, 0.1183, 0.1316, 0.1238, 0.1447, 0.1457],
        [0.1189, 0.1464, 0.1244, 0.1162, 0.1151, 0.1347, 0.1227, 0.1215],
        [0.1229, 0.1440, 0.1311, 0.1196, 0.1222, 0.1248, 0.1214, 0.1141],
        [0.1275, 0.1138, 0.0978, 0.1285, 0.1330, 0.1358, 0.1206, 0.1429],
        [0.1342, 0.1113, 0.0963, 0.1250, 0.1350, 0.1238, 0.1384, 0.1360],
        [0.1396, 0.1120, 0.0973, 0.1254, 0.1254, 0.1365, 0.1335, 0.1303]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 14 [   0/390]  Loss: 0.4157 (0.416)  Acc@1: 81.2500 (81.2500)  Acc@5: 98.4375 (98.4375)LR: 2.065e-02
Train: 14 [  50/390]  Loss: 0.2521 (0.408)  Acc@1: 92.1875 (86.2132)  Acc@5: 100.0000 (99.3260)LR: 2.065e-02
Train: 14 [ 100/390]  Loss: 0.3976 (0.404)  Acc@1: 90.6250 (86.3552)  Acc@5: 96.8750 (99.4740)LR: 2.065e-02
Train: 14 [ 150/390]  Loss: 0.4891 (0.413)  Acc@1: 84.3750 (85.8133)  Acc@5: 100.0000 (99.4412)LR: 2.065e-02
Train: 14 [ 200/390]  Loss: 0.3638 (0.410)  Acc@1: 89.0625 (85.8909)  Acc@5: 100.0000 (99.4636)LR: 2.065e-02
Train: 14 [ 250/390]  Loss: 0.4054 (0.416)  Acc@1: 89.0625 (85.7756)  Acc@5: 100.0000 (99.5207)LR: 2.065e-02
Train: 14 [ 300/390]  Loss: 0.6114 (0.421)  Acc@1: 71.8750 (85.5222)  Acc@5: 100.0000 (99.4965)LR: 2.065e-02
Train: 14 [ 350/390]  Loss: 0.5158 (0.421)  Acc@1: 78.1250 (85.6125)  Acc@5: 98.4375 (99.4480)LR: 2.065e-02
Train: 14 [ 390/390]  Loss: 0.4274 (0.424)  Acc@1: 85.0000 (85.4400)  Acc@5: 100.0000 (99.4160)LR: 2.065e-02
train_acc 85.440000
Valid: 14 [   0/390]  Loss: 0.4945 (0.495)  Acc@1: 79.6875 (79.6875)  Acc@5: 100.0000 (100.0000)
Valid: 14 [  50/390]  Loss: 0.5332 (0.493)  Acc@1: 76.5625 (82.3836)  Acc@5: 98.4375 (99.2341)
Valid: 14 [ 100/390]  Loss: 0.5331 (0.510)  Acc@1: 87.5000 (82.4257)  Acc@5: 98.4375 (99.0873)
Valid: 14 [ 150/390]  Loss: 0.5179 (0.511)  Acc@1: 84.3750 (82.5331)  Acc@5: 96.8750 (99.1204)
Valid: 14 [ 200/390]  Loss: 0.6004 (0.510)  Acc@1: 76.5625 (82.6881)  Acc@5: 100.0000 (99.1216)
Valid: 14 [ 250/390]  Loss: 0.4178 (0.507)  Acc@1: 85.9375 (82.6506)  Acc@5: 98.4375 (99.1658)
Valid: 14 [ 300/390]  Loss: 0.3282 (0.507)  Acc@1: 89.0625 (82.7087)  Acc@5: 100.0000 (99.1798)
Valid: 14 [ 350/390]  Loss: 0.4894 (0.506)  Acc@1: 85.9375 (82.7902)  Acc@5: 98.4375 (99.2032)
Valid: 14 [ 390/390]  Loss: 0.7791 (0.504)  Acc@1: 72.5000 (82.9040)  Acc@5: 100.0000 (99.1960)
valid_acc 82.904000
epoch = 14   
 genotype = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_3x3', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 0), ('max_pool_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1387, 0.0956, 0.0730, 0.0986, 0.1602, 0.1584, 0.1419, 0.1335],
        [0.1480, 0.0856, 0.0737, 0.0927, 0.1695, 0.1480, 0.1432, 0.1392],
        [0.1546, 0.1041, 0.0748, 0.0994, 0.1371, 0.1431, 0.1453, 0.1416],
        [0.1503, 0.0901, 0.0768, 0.0965, 0.1717, 0.1528, 0.1268, 0.1350],
        [0.1811, 0.0847, 0.0695, 0.1038, 0.1420, 0.1389, 0.1412, 0.1388],
        [0.1703, 0.0971, 0.0754, 0.0960, 0.1468, 0.1496, 0.1288, 0.1362],
        [0.1549, 0.0888, 0.0781, 0.0960, 0.1555, 0.1768, 0.1264, 0.1235],
        [0.1831, 0.0840, 0.0698, 0.0995, 0.1440, 0.1387, 0.1428, 0.1381],
        [0.1947, 0.0745, 0.0657, 0.0833, 0.1493, 0.1408, 0.1489, 0.1429],
        [0.1866, 0.0965, 0.0762, 0.0967, 0.1497, 0.1348, 0.1325, 0.1269],
        [0.1671, 0.0851, 0.0757, 0.0946, 0.1715, 0.1262, 0.1366, 0.1431],
        [0.2165, 0.0803, 0.0680, 0.1008, 0.1309, 0.1350, 0.1336, 0.1349],
        [0.2258, 0.0724, 0.0652, 0.0830, 0.1407, 0.1337, 0.1367, 0.1425],
        [0.2160, 0.0687, 0.0612, 0.0718, 0.1466, 0.1440, 0.1305, 0.1613]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1126, 0.1491, 0.1219, 0.1126, 0.1315, 0.1282, 0.1182, 0.1259],
        [0.1352, 0.1314, 0.1178, 0.1210, 0.1173, 0.1239, 0.1228, 0.1308],
        [0.1144, 0.1466, 0.1224, 0.1161, 0.1162, 0.1362, 0.1295, 0.1186],
        [0.1277, 0.1367, 0.1221, 0.1200, 0.1161, 0.1306, 0.1387, 0.1080],
        [0.1345, 0.1176, 0.0952, 0.1240, 0.1232, 0.1291, 0.1337, 0.1427],
        [0.1141, 0.1469, 0.1196, 0.1297, 0.1220, 0.1292, 0.1181, 0.1203],
        [0.1165, 0.1371, 0.1229, 0.1264, 0.1201, 0.1239, 0.1266, 0.1265],
        [0.1315, 0.1142, 0.0939, 0.1253, 0.1487, 0.1342, 0.1239, 0.1282],
        [0.1374, 0.1067, 0.0922, 0.1201, 0.1305, 0.1231, 0.1450, 0.1450],
        [0.1177, 0.1475, 0.1250, 0.1146, 0.1170, 0.1349, 0.1214, 0.1220],
        [0.1230, 0.1450, 0.1319, 0.1186, 0.1214, 0.1257, 0.1195, 0.1149],
        [0.1286, 0.1141, 0.0980, 0.1308, 0.1333, 0.1345, 0.1194, 0.1413],
        [0.1346, 0.1100, 0.0958, 0.1263, 0.1347, 0.1235, 0.1396, 0.1356],
        [0.1413, 0.1101, 0.0961, 0.1257, 0.1245, 0.1366, 0.1346, 0.1311]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 15 [   0/390]  Loss: 0.3811 (0.381)  Acc@1: 85.9375 (85.9375)  Acc@5: 100.0000 (100.0000)LR: 2.005e-02
Train: 15 [  50/390]  Loss: 0.2943 (0.389)  Acc@1: 92.1875 (86.2745)  Acc@5: 100.0000 (99.5404)LR: 2.005e-02
Train: 15 [ 100/390]  Loss: 0.3398 (0.376)  Acc@1: 89.0625 (86.6027)  Acc@5: 100.0000 (99.5359)LR: 2.005e-02
Train: 15 [ 150/390]  Loss: 0.2409 (0.383)  Acc@1: 92.1875 (86.4756)  Acc@5: 100.0000 (99.5654)LR: 2.005e-02
Train: 15 [ 200/390]  Loss: 0.3068 (0.390)  Acc@1: 89.0625 (86.2873)  Acc@5: 100.0000 (99.5336)LR: 2.005e-02
Train: 15 [ 250/390]  Loss: 0.4534 (0.392)  Acc@1: 81.2500 (86.2425)  Acc@5: 98.4375 (99.5269)LR: 2.005e-02
Train: 15 [ 300/390]  Loss: 0.4210 (0.397)  Acc@1: 85.9375 (86.1763)  Acc@5: 100.0000 (99.4809)LR: 2.005e-02
Train: 15 [ 350/390]  Loss: 0.4893 (0.406)  Acc@1: 81.2500 (85.8974)  Acc@5: 100.0000 (99.4569)LR: 2.005e-02
Train: 15 [ 390/390]  Loss: 0.6048 (0.412)  Acc@1: 70.0000 (85.7360)  Acc@5: 100.0000 (99.4200)LR: 2.005e-02
train_acc 85.736000
Valid: 15 [   0/390]  Loss: 0.4632 (0.463)  Acc@1: 82.8125 (82.8125)  Acc@5: 100.0000 (100.0000)
Valid: 15 [  50/390]  Loss: 0.6428 (0.558)  Acc@1: 78.1250 (81.5257)  Acc@5: 98.4375 (98.9583)
Valid: 15 [ 100/390]  Loss: 0.2755 (0.547)  Acc@1: 89.0625 (81.6058)  Acc@5: 100.0000 (99.0254)
Valid: 15 [ 150/390]  Loss: 0.4688 (0.548)  Acc@1: 85.9375 (81.7881)  Acc@5: 98.4375 (99.0791)
Valid: 15 [ 200/390]  Loss: 0.4413 (0.545)  Acc@1: 85.9375 (81.7631)  Acc@5: 100.0000 (99.1060)
Valid: 15 [ 250/390]  Loss: 0.4601 (0.549)  Acc@1: 81.2500 (81.7916)  Acc@5: 100.0000 (99.0849)
Valid: 15 [ 300/390]  Loss: 0.4715 (0.555)  Acc@1: 82.8125 (81.6809)  Acc@5: 100.0000 (99.0864)
Valid: 15 [ 350/390]  Loss: 0.4764 (0.560)  Acc@1: 78.1250 (81.4770)  Acc@5: 100.0000 (99.0919)
Valid: 15 [ 390/390]  Loss: 0.3851 (0.561)  Acc@1: 87.5000 (81.4760)  Acc@5: 100.0000 (99.0920)
valid_acc 81.476000
epoch = 15   
 genotype = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_5x5', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('sep_conv_3x3', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1383, 0.0939, 0.0713, 0.0979, 0.1638, 0.1591, 0.1429, 0.1328],
        [0.1518, 0.0833, 0.0718, 0.0916, 0.1705, 0.1486, 0.1437, 0.1386],
        [0.1561, 0.1029, 0.0732, 0.0991, 0.1372, 0.1448, 0.1445, 0.1421],
        [0.1531, 0.0871, 0.0739, 0.0938, 0.1753, 0.1552, 0.1271, 0.1345],
        [0.1875, 0.0816, 0.0664, 0.1015, 0.1422, 0.1390, 0.1428, 0.1389],
        [0.1734, 0.0958, 0.0737, 0.0954, 0.1469, 0.1510, 0.1289, 0.1347],
        [0.1580, 0.0864, 0.0759, 0.0946, 0.1556, 0.1816, 0.1248, 0.1232],
        [0.1897, 0.0811, 0.0670, 0.0980, 0.1435, 0.1397, 0.1431, 0.1379],
        [0.2021, 0.0718, 0.0633, 0.0816, 0.1504, 0.1406, 0.1474, 0.1428],
        [0.1933, 0.0949, 0.0742, 0.0955, 0.1496, 0.1341, 0.1329, 0.1257],
        [0.1732, 0.0821, 0.0728, 0.0921, 0.1743, 0.1251, 0.1368, 0.1436],
        [0.2263, 0.0772, 0.0649, 0.0986, 0.1297, 0.1341, 0.1328, 0.1364],
        [0.2365, 0.0694, 0.0623, 0.0804, 0.1406, 0.1319, 0.1373, 0.1416],
        [0.2294, 0.0654, 0.0581, 0.0690, 0.1435, 0.1433, 0.1305, 0.1609]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1116, 0.1518, 0.1241, 0.1117, 0.1321, 0.1279, 0.1165, 0.1244],
        [0.1353, 0.1310, 0.1172, 0.1219, 0.1172, 0.1239, 0.1235, 0.1299],
        [0.1129, 0.1498, 0.1246, 0.1153, 0.1149, 0.1358, 0.1303, 0.1163],
        [0.1275, 0.1377, 0.1226, 0.1207, 0.1147, 0.1301, 0.1404, 0.1064],
        [0.1351, 0.1166, 0.0952, 0.1257, 0.1235, 0.1277, 0.1340, 0.1421],
        [0.1125, 0.1496, 0.1215, 0.1316, 0.1195, 0.1294, 0.1174, 0.1184],
        [0.1166, 0.1375, 0.1233, 0.1272, 0.1192, 0.1242, 0.1258, 0.1262],
        [0.1314, 0.1127, 0.0939, 0.1270, 0.1491, 0.1346, 0.1233, 0.1280],
        [0.1395, 0.1049, 0.0914, 0.1212, 0.1308, 0.1222, 0.1440, 0.1460],
        [0.1163, 0.1499, 0.1267, 0.1126, 0.1176, 0.1348, 0.1206, 0.1214],
        [0.1224, 0.1471, 0.1335, 0.1182, 0.1217, 0.1248, 0.1176, 0.1147],
        [0.1289, 0.1127, 0.0980, 0.1333, 0.1336, 0.1327, 0.1184, 0.1423],
        [0.1373, 0.1083, 0.0953, 0.1287, 0.1333, 0.1222, 0.1412, 0.1336],
        [0.1423, 0.1082, 0.0949, 0.1266, 0.1248, 0.1372, 0.1355, 0.1305]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 16 [   0/390]  Loss: 0.5307 (0.531)  Acc@1: 81.2500 (81.2500)  Acc@5: 100.0000 (100.0000)LR: 1.943e-02
Train: 16 [  50/390]  Loss: 0.3377 (0.393)  Acc@1: 87.5000 (85.6924)  Acc@5: 100.0000 (99.6324)LR: 1.943e-02
Train: 16 [ 100/390]  Loss: 0.4040 (0.377)  Acc@1: 85.9375 (86.3397)  Acc@5: 96.8750 (99.5823)LR: 1.943e-02
Train: 16 [ 150/390]  Loss: 0.4701 (0.385)  Acc@1: 84.3750 (86.2583)  Acc@5: 100.0000 (99.5137)LR: 1.943e-02
Train: 16 [ 200/390]  Loss: 0.3013 (0.391)  Acc@1: 90.6250 (86.0774)  Acc@5: 100.0000 (99.5103)LR: 1.943e-02
Train: 16 [ 250/390]  Loss: 0.5046 (0.393)  Acc@1: 87.5000 (86.1678)  Acc@5: 100.0000 (99.4958)LR: 1.943e-02
Train: 16 [ 300/390]  Loss: 0.7034 (0.392)  Acc@1: 81.2500 (86.3164)  Acc@5: 96.8750 (99.5017)LR: 1.943e-02
Train: 16 [ 350/390]  Loss: 0.3800 (0.391)  Acc@1: 82.8125 (86.4717)  Acc@5: 98.4375 (99.4925)LR: 1.943e-02
Train: 16 [ 390/390]  Loss: 0.2461 (0.392)  Acc@1: 95.0000 (86.4080)  Acc@5: 100.0000 (99.4800)LR: 1.943e-02
train_acc 86.408000
Valid: 16 [   0/390]  Loss: 0.5801 (0.580)  Acc@1: 82.8125 (82.8125)  Acc@5: 100.0000 (100.0000)
Valid: 16 [  50/390]  Loss: 0.4411 (0.501)  Acc@1: 87.5000 (83.2414)  Acc@5: 100.0000 (99.3566)
Valid: 16 [ 100/390]  Loss: 0.5660 (0.501)  Acc@1: 79.6875 (83.3849)  Acc@5: 100.0000 (99.2110)
Valid: 16 [ 150/390]  Loss: 0.4065 (0.501)  Acc@1: 89.0625 (83.1436)  Acc@5: 100.0000 (99.2239)
Valid: 16 [ 200/390]  Loss: 0.7516 (0.508)  Acc@1: 71.8750 (82.9524)  Acc@5: 96.8750 (99.1993)
Valid: 16 [ 250/390]  Loss: 0.2910 (0.505)  Acc@1: 90.6250 (83.1051)  Acc@5: 100.0000 (99.2032)
Valid: 16 [ 300/390]  Loss: 0.6901 (0.502)  Acc@1: 81.2500 (83.3576)  Acc@5: 98.4375 (99.1902)
Valid: 16 [ 350/390]  Loss: 0.5171 (0.506)  Acc@1: 84.3750 (83.2087)  Acc@5: 100.0000 (99.1631)
Valid: 16 [ 390/390]  Loss: 0.4926 (0.503)  Acc@1: 80.0000 (83.2560)  Acc@5: 100.0000 (99.1840)
valid_acc 83.256000
epoch = 16   
 genotype = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_5x5', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_3x3', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 0), ('max_pool_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1381, 0.0911, 0.0689, 0.0957, 0.1685, 0.1608, 0.1443, 0.1326],
        [0.1553, 0.0801, 0.0692, 0.0895, 0.1719, 0.1496, 0.1446, 0.1399],
        [0.1584, 0.1005, 0.0710, 0.0976, 0.1382, 0.1456, 0.1454, 0.1432],
        [0.1584, 0.0846, 0.0720, 0.0926, 0.1759, 0.1540, 0.1282, 0.1342],
        [0.1911, 0.0796, 0.0641, 0.1001, 0.1431, 0.1387, 0.1436, 0.1398],
        [0.1782, 0.0940, 0.0721, 0.0949, 0.1462, 0.1511, 0.1281, 0.1352],
        [0.1623, 0.0828, 0.0732, 0.0924, 0.1581, 0.1832, 0.1239, 0.1241],
        [0.1947, 0.0787, 0.0644, 0.0967, 0.1437, 0.1394, 0.1433, 0.1389],
        [0.2071, 0.0687, 0.0605, 0.0791, 0.1511, 0.1404, 0.1491, 0.1440],
        [0.2007, 0.0923, 0.0717, 0.0936, 0.1503, 0.1344, 0.1312, 0.1257],
        [0.1792, 0.0791, 0.0701, 0.0900, 0.1759, 0.1248, 0.1370, 0.1439],
        [0.2340, 0.0750, 0.0626, 0.0969, 0.1288, 0.1341, 0.1330, 0.1357],
        [0.2484, 0.0666, 0.0598, 0.0784, 0.1383, 0.1294, 0.1371, 0.1419],
        [0.2392, 0.0628, 0.0554, 0.0665, 0.1422, 0.1418, 0.1301, 0.1619]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1114, 0.1524, 0.1237, 0.1101, 0.1337, 0.1282, 0.1157, 0.1248],
        [0.1355, 0.1318, 0.1178, 0.1230, 0.1167, 0.1241, 0.1227, 0.1284],
        [0.1125, 0.1500, 0.1240, 0.1157, 0.1152, 0.1356, 0.1304, 0.1166],
        [0.1275, 0.1394, 0.1241, 0.1204, 0.1124, 0.1289, 0.1419, 0.1053],
        [0.1347, 0.1150, 0.0938, 0.1250, 0.1236, 0.1286, 0.1348, 0.1445],
        [0.1126, 0.1489, 0.1198, 0.1315, 0.1203, 0.1309, 0.1177, 0.1183],
        [0.1159, 0.1384, 0.1238, 0.1268, 0.1183, 0.1245, 0.1257, 0.1267],
        [0.1317, 0.1104, 0.0920, 0.1260, 0.1502, 0.1368, 0.1238, 0.1291],
        [0.1411, 0.1032, 0.0896, 0.1212, 0.1314, 0.1214, 0.1450, 0.1472],
        [0.1168, 0.1491, 0.1250, 0.1130, 0.1183, 0.1355, 0.1208, 0.1215],
        [0.1215, 0.1491, 0.1344, 0.1170, 0.1218, 0.1246, 0.1167, 0.1149],
        [0.1295, 0.1113, 0.0965, 0.1339, 0.1338, 0.1317, 0.1196, 0.1438],
        [0.1365, 0.1066, 0.0936, 0.1289, 0.1340, 0.1224, 0.1435, 0.1345],
        [0.1432, 0.1069, 0.0941, 0.1277, 0.1241, 0.1366, 0.1364, 0.1310]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 17 [   0/390]  Loss: 0.2779 (0.278)  Acc@1: 89.0625 (89.0625)  Acc@5: 100.0000 (100.0000)LR: 1.878e-02
Train: 17 [  50/390]  Loss: 0.4079 (0.358)  Acc@1: 85.9375 (87.5306)  Acc@5: 96.8750 (99.6936)LR: 1.878e-02
Train: 17 [ 100/390]  Loss: 0.3192 (0.375)  Acc@1: 87.5000 (87.0514)  Acc@5: 100.0000 (99.5823)LR: 1.878e-02
Train: 17 [ 150/390]  Loss: 0.3553 (0.370)  Acc@1: 84.3750 (87.3241)  Acc@5: 100.0000 (99.5654)LR: 1.878e-02
Train: 17 [ 200/390]  Loss: 0.7172 (0.374)  Acc@1: 75.0000 (87.2046)  Acc@5: 95.3125 (99.5258)LR: 1.878e-02
Train: 17 [ 250/390]  Loss: 0.4822 (0.378)  Acc@1: 82.8125 (86.9771)  Acc@5: 100.0000 (99.5580)LR: 1.878e-02
Train: 17 [ 300/390]  Loss: 0.4502 (0.378)  Acc@1: 82.8125 (86.9705)  Acc@5: 100.0000 (99.5640)LR: 1.878e-02
Train: 17 [ 350/390]  Loss: 0.6405 (0.380)  Acc@1: 82.8125 (86.8812)  Acc@5: 98.4375 (99.5548)LR: 1.878e-02
Train: 17 [ 390/390]  Loss: 0.6792 (0.386)  Acc@1: 75.0000 (86.6840)  Acc@5: 100.0000 (99.5360)LR: 1.878e-02
train_acc 86.684000
Valid: 17 [   0/390]  Loss: 0.6587 (0.659)  Acc@1: 78.1250 (78.1250)  Acc@5: 98.4375 (98.4375)
Valid: 17 [  50/390]  Loss: 0.5688 (0.487)  Acc@1: 81.2500 (82.9350)  Acc@5: 100.0000 (99.0809)
Valid: 17 [ 100/390]  Loss: 0.3219 (0.477)  Acc@1: 87.5000 (83.6324)  Acc@5: 100.0000 (99.0408)
Valid: 17 [ 150/390]  Loss: 0.4275 (0.478)  Acc@1: 85.9375 (83.7438)  Acc@5: 100.0000 (99.0584)
Valid: 17 [ 200/390]  Loss: 0.3827 (0.479)  Acc@1: 84.3750 (83.7531)  Acc@5: 100.0000 (99.0672)
Valid: 17 [ 250/390]  Loss: 0.3822 (0.477)  Acc@1: 90.6250 (83.8521)  Acc@5: 100.0000 (99.1347)
Valid: 17 [ 300/390]  Loss: 0.5706 (0.476)  Acc@1: 84.3750 (83.8870)  Acc@5: 100.0000 (99.1331)
Valid: 17 [ 350/390]  Loss: 0.5250 (0.475)  Acc@1: 82.8125 (83.8586)  Acc@5: 100.0000 (99.1631)
Valid: 17 [ 390/390]  Loss: 0.6028 (0.477)  Acc@1: 80.0000 (83.7680)  Acc@5: 100.0000 (99.1960)
valid_acc 83.768000
epoch = 17   
 genotype = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_5x5', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 3), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('sep_conv_3x3', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1368, 0.0890, 0.0674, 0.0953, 0.1726, 0.1604, 0.1459, 0.1326],
        [0.1588, 0.0773, 0.0669, 0.0877, 0.1734, 0.1494, 0.1460, 0.1405],
        [0.1608, 0.0991, 0.0700, 0.0986, 0.1378, 0.1462, 0.1457, 0.1418],
        [0.1630, 0.0821, 0.0699, 0.0913, 0.1759, 0.1543, 0.1294, 0.1341],
        [0.1965, 0.0771, 0.0621, 0.0996, 0.1418, 0.1392, 0.1433, 0.1405],
        [0.1816, 0.0922, 0.0709, 0.0957, 0.1469, 0.1502, 0.1280, 0.1345],
        [0.1660, 0.0799, 0.0712, 0.0912, 0.1584, 0.1849, 0.1235, 0.1249],
        [0.2002, 0.0759, 0.0623, 0.0960, 0.1441, 0.1391, 0.1425, 0.1399],
        [0.2152, 0.0654, 0.0580, 0.0769, 0.1506, 0.1404, 0.1489, 0.1447],
        [0.2066, 0.0906, 0.0702, 0.0933, 0.1502, 0.1327, 0.1315, 0.1249],
        [0.1848, 0.0764, 0.0678, 0.0887, 0.1784, 0.1234, 0.1376, 0.1429],
        [0.2427, 0.0732, 0.0609, 0.0968, 0.1273, 0.1314, 0.1331, 0.1346],
        [0.2643, 0.0636, 0.0571, 0.0763, 0.1346, 0.1270, 0.1354, 0.1417],
        [0.2554, 0.0601, 0.0531, 0.0645, 0.1403, 0.1387, 0.1280, 0.1597]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1102, 0.1548, 0.1239, 0.1098, 0.1333, 0.1271, 0.1165, 0.1244],
        [0.1359, 0.1327, 0.1182, 0.1238, 0.1148, 0.1233, 0.1220, 0.1294],
        [0.1105, 0.1516, 0.1244, 0.1160, 0.1156, 0.1347, 0.1314, 0.1159],
        [0.1281, 0.1405, 0.1251, 0.1204, 0.1117, 0.1287, 0.1420, 0.1035],
        [0.1350, 0.1137, 0.0929, 0.1257, 0.1241, 0.1269, 0.1356, 0.1461],
        [0.1104, 0.1515, 0.1201, 0.1311, 0.1194, 0.1309, 0.1188, 0.1177],
        [0.1165, 0.1397, 0.1243, 0.1262, 0.1183, 0.1231, 0.1243, 0.1277],
        [0.1323, 0.1097, 0.0913, 0.1274, 0.1504, 0.1368, 0.1235, 0.1286],
        [0.1428, 0.1031, 0.0892, 0.1233, 0.1295, 0.1203, 0.1449, 0.1469],
        [0.1150, 0.1517, 0.1260, 0.1126, 0.1176, 0.1347, 0.1208, 0.1216],
        [0.1213, 0.1501, 0.1350, 0.1169, 0.1215, 0.1239, 0.1165, 0.1148],
        [0.1301, 0.1101, 0.0958, 0.1354, 0.1345, 0.1310, 0.1193, 0.1438],
        [0.1378, 0.1063, 0.0934, 0.1316, 0.1330, 0.1211, 0.1449, 0.1319],
        [0.1459, 0.1052, 0.0936, 0.1295, 0.1233, 0.1357, 0.1353, 0.1315]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 18 [   0/390]  Loss: 0.3216 (0.322)  Acc@1: 85.9375 (85.9375)  Acc@5: 100.0000 (100.0000)LR: 1.811e-02
Train: 18 [  50/390]  Loss: 0.3839 (0.357)  Acc@1: 87.5000 (87.7757)  Acc@5: 100.0000 (99.6936)LR: 1.811e-02
Train: 18 [ 100/390]  Loss: 0.2321 (0.364)  Acc@1: 92.1875 (87.4845)  Acc@5: 100.0000 (99.6442)LR: 1.811e-02
Train: 18 [ 150/390]  Loss: 0.3978 (0.368)  Acc@1: 87.5000 (87.1896)  Acc@5: 100.0000 (99.6585)LR: 1.811e-02
Train: 18 [ 200/390]  Loss: 0.3215 (0.366)  Acc@1: 90.6250 (87.1735)  Acc@5: 100.0000 (99.6657)LR: 1.811e-02
Train: 18 [ 250/390]  Loss: 0.3774 (0.368)  Acc@1: 92.1875 (87.2323)  Acc@5: 98.4375 (99.6203)LR: 1.811e-02
Train: 18 [ 300/390]  Loss: 0.4695 (0.371)  Acc@1: 87.5000 (87.0536)  Acc@5: 100.0000 (99.6107)LR: 1.811e-02
Train: 18 [ 350/390]  Loss: 0.3385 (0.375)  Acc@1: 87.5000 (86.9124)  Acc@5: 100.0000 (99.5816)LR: 1.811e-02
Train: 18 [ 390/390]  Loss: 0.4869 (0.375)  Acc@1: 85.0000 (86.9440)  Acc@5: 100.0000 (99.5840)LR: 1.811e-02
train_acc 86.944000
Valid: 18 [   0/390]  Loss: 0.5491 (0.549)  Acc@1: 75.0000 (75.0000)  Acc@5: 100.0000 (100.0000)
Valid: 18 [  50/390]  Loss: 0.6449 (0.471)  Acc@1: 79.6875 (83.5172)  Acc@5: 98.4375 (99.2034)
Valid: 18 [ 100/390]  Loss: 0.4964 (0.459)  Acc@1: 87.5000 (84.0501)  Acc@5: 100.0000 (99.3657)
Valid: 18 [ 150/390]  Loss: 0.4807 (0.456)  Acc@1: 85.9375 (84.4060)  Acc@5: 96.8750 (99.2964)
Valid: 18 [ 200/390]  Loss: 0.4474 (0.455)  Acc@1: 87.5000 (84.5460)  Acc@5: 98.4375 (99.2771)
Valid: 18 [ 250/390]  Loss: 0.4063 (0.461)  Acc@1: 85.9375 (84.3439)  Acc@5: 100.0000 (99.2592)
Valid: 18 [ 300/390]  Loss: 0.9155 (0.467)  Acc@1: 71.8750 (84.0012)  Acc@5: 96.8750 (99.2733)
Valid: 18 [ 350/390]  Loss: 0.3803 (0.467)  Acc@1: 84.3750 (84.0367)  Acc@5: 100.0000 (99.2744)
Valid: 18 [ 390/390]  Loss: 0.5063 (0.466)  Acc@1: 82.5000 (84.0640)  Acc@5: 100.0000 (99.2880)
valid_acc 84.064000
epoch = 18   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('sep_conv_3x3', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1367, 0.0868, 0.0660, 0.0946, 0.1752, 0.1617, 0.1467, 0.1323],
        [0.1619, 0.0743, 0.0645, 0.0855, 0.1748, 0.1511, 0.1460, 0.1420],
        [0.1632, 0.0968, 0.0686, 0.0984, 0.1392, 0.1450, 0.1465, 0.1423],
        [0.1666, 0.0793, 0.0680, 0.0898, 0.1775, 0.1538, 0.1297, 0.1354],
        [0.2027, 0.0746, 0.0598, 0.0984, 0.1398, 0.1398, 0.1440, 0.1410],
        [0.1857, 0.0903, 0.0697, 0.0954, 0.1476, 0.1506, 0.1277, 0.1331],
        [0.1701, 0.0773, 0.0694, 0.0897, 0.1599, 0.1864, 0.1224, 0.1248],
        [0.2054, 0.0739, 0.0605, 0.0954, 0.1444, 0.1382, 0.1429, 0.1393],
        [0.2214, 0.0631, 0.0561, 0.0754, 0.1490, 0.1407, 0.1492, 0.1452],
        [0.2123, 0.0888, 0.0690, 0.0931, 0.1499, 0.1319, 0.1302, 0.1248],
        [0.1909, 0.0734, 0.0653, 0.0864, 0.1819, 0.1234, 0.1367, 0.1420],
        [0.2520, 0.0711, 0.0591, 0.0965, 0.1256, 0.1291, 0.1329, 0.1337],
        [0.2770, 0.0610, 0.0551, 0.0746, 0.1326, 0.1254, 0.1348, 0.1396],
        [0.2679, 0.0573, 0.0508, 0.0623, 0.1394, 0.1342, 0.1279, 0.1603]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1088, 0.1562, 0.1249, 0.1100, 0.1344, 0.1260, 0.1168, 0.1229],
        [0.1367, 0.1327, 0.1185, 0.1241, 0.1135, 0.1229, 0.1222, 0.1292],
        [0.1099, 0.1517, 0.1250, 0.1148, 0.1155, 0.1359, 0.1323, 0.1148],
        [0.1278, 0.1415, 0.1266, 0.1205, 0.1107, 0.1283, 0.1417, 0.1029],
        [0.1347, 0.1132, 0.0929, 0.1267, 0.1241, 0.1267, 0.1356, 0.1463],
        [0.1099, 0.1522, 0.1208, 0.1315, 0.1200, 0.1309, 0.1188, 0.1160],
        [0.1163, 0.1406, 0.1260, 0.1270, 0.1178, 0.1211, 0.1233, 0.1279],
        [0.1330, 0.1092, 0.0919, 0.1297, 0.1485, 0.1353, 0.1235, 0.1289],
        [0.1426, 0.1020, 0.0895, 0.1246, 0.1299, 0.1203, 0.1434, 0.1478],
        [0.1138, 0.1514, 0.1261, 0.1122, 0.1179, 0.1357, 0.1214, 0.1215],
        [0.1213, 0.1512, 0.1371, 0.1148, 0.1212, 0.1234, 0.1167, 0.1141],
        [0.1310, 0.1095, 0.0964, 0.1377, 0.1341, 0.1302, 0.1180, 0.1431],
        [0.1384, 0.1053, 0.0936, 0.1334, 0.1323, 0.1202, 0.1444, 0.1326],
        [0.1479, 0.1035, 0.0932, 0.1304, 0.1238, 0.1353, 0.1341, 0.1319]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 19 [   0/390]  Loss: 0.3183 (0.318)  Acc@1: 92.1875 (92.1875)  Acc@5: 100.0000 (100.0000)LR: 1.742e-02
Train: 19 [  50/390]  Loss: 0.3460 (0.361)  Acc@1: 84.3750 (87.1630)  Acc@5: 100.0000 (99.6630)LR: 1.742e-02
Train: 19 [ 100/390]  Loss: 0.4004 (0.361)  Acc@1: 87.5000 (87.0978)  Acc@5: 100.0000 (99.7215)LR: 1.742e-02
Train: 19 [ 150/390]  Loss: 0.4433 (0.366)  Acc@1: 84.3750 (87.0240)  Acc@5: 100.0000 (99.7103)LR: 1.742e-02
Train: 19 [ 200/390]  Loss: 0.2642 (0.361)  Acc@1: 93.7500 (86.9947)  Acc@5: 100.0000 (99.6968)LR: 1.742e-02
Train: 19 [ 250/390]  Loss: 0.2159 (0.360)  Acc@1: 95.3125 (87.1389)  Acc@5: 100.0000 (99.6327)LR: 1.742e-02
Train: 19 [ 300/390]  Loss: 0.2678 (0.358)  Acc@1: 89.0625 (87.2301)  Acc@5: 100.0000 (99.6211)LR: 1.742e-02
Train: 19 [ 350/390]  Loss: 0.5439 (0.356)  Acc@1: 79.6875 (87.3531)  Acc@5: 100.0000 (99.6439)LR: 1.742e-02
Train: 19 [ 390/390]  Loss: 0.4774 (0.359)  Acc@1: 77.5000 (87.3640)  Acc@5: 100.0000 (99.6080)LR: 1.742e-02
train_acc 87.364000
Valid: 19 [   0/390]  Loss: 0.4729 (0.473)  Acc@1: 84.3750 (84.3750)  Acc@5: 98.4375 (98.4375)
Valid: 19 [  50/390]  Loss: 0.6088 (0.509)  Acc@1: 79.6875 (83.4865)  Acc@5: 98.4375 (99.1728)
Valid: 19 [ 100/390]  Loss: 0.4844 (0.502)  Acc@1: 84.3750 (83.5087)  Acc@5: 100.0000 (99.1955)
Valid: 19 [ 150/390]  Loss: 0.5955 (0.502)  Acc@1: 81.2500 (83.5782)  Acc@5: 100.0000 (99.0998)
Valid: 19 [ 200/390]  Loss: 0.6960 (0.492)  Acc@1: 78.1250 (83.8386)  Acc@5: 100.0000 (99.1449)
Valid: 19 [ 250/390]  Loss: 0.5682 (0.487)  Acc@1: 73.4375 (83.9081)  Acc@5: 100.0000 (99.1347)
Valid: 19 [ 300/390]  Loss: 0.5310 (0.486)  Acc@1: 81.2500 (83.9234)  Acc@5: 98.4375 (99.1487)
Valid: 19 [ 350/390]  Loss: 0.4880 (0.487)  Acc@1: 81.2500 (83.8453)  Acc@5: 100.0000 (99.1720)
Valid: 19 [ 390/390]  Loss: 0.4997 (0.481)  Acc@1: 80.0000 (83.9840)  Acc@5: 100.0000 (99.2120)
valid_acc 83.984000
epoch = 19   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_5x5', 3), ('max_pool_3x3', 1), ('max_pool_3x3', 0)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1373, 0.0847, 0.0645, 0.0934, 0.1775, 0.1625, 0.1481, 0.1320],
        [0.1639, 0.0718, 0.0625, 0.0839, 0.1768, 0.1517, 0.1469, 0.1425],
        [0.1662, 0.0947, 0.0669, 0.0974, 0.1378, 0.1463, 0.1483, 0.1423],
        [0.1697, 0.0762, 0.0655, 0.0876, 0.1801, 0.1547, 0.1300, 0.1362],
        [0.2081, 0.0719, 0.0575, 0.0969, 0.1403, 0.1402, 0.1439, 0.1411],
        [0.1886, 0.0884, 0.0686, 0.0951, 0.1481, 0.1487, 0.1282, 0.1342],
        [0.1744, 0.0745, 0.0672, 0.0879, 0.1609, 0.1888, 0.1214, 0.1249],
        [0.2104, 0.0714, 0.0588, 0.0948, 0.1451, 0.1384, 0.1425, 0.1385],
        [0.2288, 0.0609, 0.0544, 0.0742, 0.1487, 0.1385, 0.1498, 0.1447],
        [0.2200, 0.0870, 0.0678, 0.0930, 0.1490, 0.1311, 0.1286, 0.1235],
        [0.1977, 0.0707, 0.0632, 0.0847, 0.1822, 0.1235, 0.1374, 0.1407],
        [0.2599, 0.0685, 0.0572, 0.0954, 0.1251, 0.1278, 0.1329, 0.1333],
        [0.2899, 0.0582, 0.0529, 0.0725, 0.1302, 0.1229, 0.1337, 0.1397],
        [0.2804, 0.0545, 0.0485, 0.0601, 0.1382, 0.1312, 0.1269, 0.1602]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1080, 0.1571, 0.1255, 0.1096, 0.1338, 0.1269, 0.1178, 0.1214],
        [0.1372, 0.1336, 0.1193, 0.1227, 0.1130, 0.1225, 0.1227, 0.1290],
        [0.1094, 0.1518, 0.1251, 0.1144, 0.1151, 0.1355, 0.1337, 0.1151],
        [0.1274, 0.1428, 0.1279, 0.1209, 0.1091, 0.1276, 0.1423, 0.1021],
        [0.1347, 0.1118, 0.0917, 0.1264, 0.1247, 0.1270, 0.1375, 0.1463],
        [0.1091, 0.1534, 0.1214, 0.1329, 0.1192, 0.1305, 0.1189, 0.1146],
        [0.1159, 0.1418, 0.1272, 0.1275, 0.1163, 0.1197, 0.1233, 0.1284],
        [0.1324, 0.1085, 0.0916, 0.1303, 0.1487, 0.1357, 0.1245, 0.1284],
        [0.1429, 0.1009, 0.0889, 0.1250, 0.1295, 0.1201, 0.1437, 0.1491],
        [0.1140, 0.1518, 0.1264, 0.1117, 0.1165, 0.1359, 0.1218, 0.1219],
        [0.1204, 0.1520, 0.1382, 0.1140, 0.1217, 0.1236, 0.1172, 0.1129],
        [0.1326, 0.1079, 0.0957, 0.1391, 0.1345, 0.1292, 0.1178, 0.1432],
        [0.1397, 0.1039, 0.0930, 0.1341, 0.1313, 0.1213, 0.1454, 0.1312],
        [0.1503, 0.1018, 0.0926, 0.1310, 0.1238, 0.1359, 0.1340, 0.1307]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 20 [   0/390]  Loss: 0.3526 (0.353)  Acc@1: 87.5000 (87.5000)  Acc@5: 100.0000 (100.0000)LR: 1.671e-02
Train: 20 [  50/390]  Loss: 0.3912 (0.303)  Acc@1: 84.3750 (89.6446)  Acc@5: 100.0000 (99.6630)LR: 1.671e-02
Train: 20 [ 100/390]  Loss: 0.4677 (0.324)  Acc@1: 84.3750 (88.4282)  Acc@5: 98.4375 (99.7525)LR: 1.671e-02
Train: 20 [ 150/390]  Loss: 0.3389 (0.319)  Acc@1: 90.6250 (88.5969)  Acc@5: 100.0000 (99.7103)LR: 1.671e-02
Train: 20 [ 200/390]  Loss: 0.3222 (0.324)  Acc@1: 89.0625 (88.4873)  Acc@5: 100.0000 (99.6735)LR: 1.671e-02
Train: 20 [ 250/390]  Loss: 0.4202 (0.326)  Acc@1: 84.3750 (88.4587)  Acc@5: 100.0000 (99.6701)LR: 1.671e-02
Train: 20 [ 300/390]  Loss: 0.3262 (0.333)  Acc@1: 87.5000 (88.2735)  Acc@5: 100.0000 (99.6470)LR: 1.671e-02
Train: 20 [ 350/390]  Loss: 0.3849 (0.337)  Acc@1: 87.5000 (88.1855)  Acc@5: 98.4375 (99.6216)LR: 1.671e-02
Train: 20 [ 390/390]  Loss: 0.2129 (0.338)  Acc@1: 90.0000 (88.1560)  Acc@5: 100.0000 (99.6120)LR: 1.671e-02
train_acc 88.156000
Valid: 20 [   0/390]  Loss: 0.7340 (0.734)  Acc@1: 78.1250 (78.1250)  Acc@5: 100.0000 (100.0000)
Valid: 20 [  50/390]  Loss: 0.3850 (0.471)  Acc@1: 87.5000 (83.8235)  Acc@5: 100.0000 (99.2341)
Valid: 20 [ 100/390]  Loss: 0.4594 (0.463)  Acc@1: 85.9375 (84.2976)  Acc@5: 100.0000 (99.2574)
Valid: 20 [ 150/390]  Loss: 0.4264 (0.456)  Acc@1: 85.9375 (84.6854)  Acc@5: 100.0000 (99.3688)
Valid: 20 [ 200/390]  Loss: 0.4539 (0.461)  Acc@1: 85.9375 (84.6004)  Acc@5: 100.0000 (99.3781)
Valid: 20 [ 250/390]  Loss: 0.4159 (0.464)  Acc@1: 84.3750 (84.4248)  Acc@5: 100.0000 (99.3588)
Valid: 20 [ 300/390]  Loss: 0.7012 (0.464)  Acc@1: 78.1250 (84.4684)  Acc@5: 98.4375 (99.3667)
Valid: 20 [ 350/390]  Loss: 0.4452 (0.466)  Acc@1: 84.3750 (84.4373)  Acc@5: 100.0000 (99.3323)
Valid: 20 [ 390/390]  Loss: 0.4757 (0.464)  Acc@1: 80.0000 (84.4360)  Acc@5: 100.0000 (99.3000)
valid_acc 84.436000
epoch = 20   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 3), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_5x5', 3), ('max_pool_3x3', 1), ('max_pool_3x3', 0)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1385, 0.0815, 0.0621, 0.0915, 0.1813, 0.1647, 0.1491, 0.1313],
        [0.1658, 0.0691, 0.0601, 0.0818, 0.1799, 0.1524, 0.1479, 0.1429],
        [0.1718, 0.0926, 0.0652, 0.0970, 0.1378, 0.1456, 0.1489, 0.1410],
        [0.1737, 0.0741, 0.0636, 0.0865, 0.1809, 0.1546, 0.1303, 0.1363],
        [0.2122, 0.0695, 0.0554, 0.0953, 0.1394, 0.1423, 0.1452, 0.1407],
        [0.1938, 0.0861, 0.0668, 0.0945, 0.1486, 0.1499, 0.1272, 0.1331],
        [0.1780, 0.0722, 0.0652, 0.0862, 0.1610, 0.1930, 0.1204, 0.1241],
        [0.2178, 0.0690, 0.0567, 0.0935, 0.1443, 0.1397, 0.1405, 0.1386],
        [0.2357, 0.0581, 0.0519, 0.0717, 0.1499, 0.1382, 0.1495, 0.1449],
        [0.2303, 0.0844, 0.0655, 0.0915, 0.1468, 0.1301, 0.1281, 0.1234],
        [0.2056, 0.0683, 0.0612, 0.0833, 0.1831, 0.1213, 0.1363, 0.1409],
        [0.2696, 0.0659, 0.0551, 0.0939, 0.1248, 0.1256, 0.1323, 0.1328],
        [0.3053, 0.0552, 0.0501, 0.0694, 0.1278, 0.1208, 0.1322, 0.1391],
        [0.2949, 0.0519, 0.0462, 0.0579, 0.1363, 0.1293, 0.1246, 0.1590]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1068, 0.1588, 0.1262, 0.1097, 0.1324, 0.1272, 0.1188, 0.1201],
        [0.1381, 0.1336, 0.1192, 0.1221, 0.1128, 0.1226, 0.1223, 0.1294],
        [0.1089, 0.1523, 0.1250, 0.1138, 0.1141, 0.1376, 0.1336, 0.1147],
        [0.1272, 0.1429, 0.1282, 0.1219, 0.1083, 0.1275, 0.1431, 0.1010],
        [0.1345, 0.1109, 0.0907, 0.1261, 0.1253, 0.1252, 0.1385, 0.1489],
        [0.1086, 0.1538, 0.1210, 0.1344, 0.1188, 0.1297, 0.1192, 0.1145],
        [0.1161, 0.1424, 0.1282, 0.1284, 0.1151, 0.1183, 0.1227, 0.1287],
        [0.1324, 0.1078, 0.0916, 0.1309, 0.1489, 0.1352, 0.1242, 0.1290],
        [0.1426, 0.0999, 0.0887, 0.1252, 0.1299, 0.1187, 0.1450, 0.1500],
        [0.1135, 0.1523, 0.1260, 0.1118, 0.1172, 0.1359, 0.1218, 0.1215],
        [0.1199, 0.1529, 0.1390, 0.1138, 0.1219, 0.1240, 0.1161, 0.1123],
        [0.1343, 0.1069, 0.0951, 0.1401, 0.1347, 0.1279, 0.1171, 0.1439],
        [0.1413, 0.1025, 0.0924, 0.1356, 0.1298, 0.1208, 0.1470, 0.1306],
        [0.1511, 0.1003, 0.0914, 0.1316, 0.1239, 0.1356, 0.1347, 0.1314]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 21 [   0/390]  Loss: 0.3952 (0.395)  Acc@1: 84.3750 (84.3750)  Acc@5: 100.0000 (100.0000)LR: 1.598e-02
Train: 21 [  50/390]  Loss: 0.3508 (0.331)  Acc@1: 90.6250 (88.4804)  Acc@5: 100.0000 (99.5404)LR: 1.598e-02
Train: 21 [ 100/390]  Loss: 0.4943 (0.314)  Acc@1: 82.8125 (89.2017)  Acc@5: 100.0000 (99.6287)LR: 1.598e-02
Train: 21 [ 150/390]  Loss: 0.1384 (0.322)  Acc@1: 95.3125 (88.8659)  Acc@5: 100.0000 (99.6585)LR: 1.598e-02
Train: 21 [ 200/390]  Loss: 0.2793 (0.321)  Acc@1: 90.6250 (88.8604)  Acc@5: 100.0000 (99.6968)LR: 1.598e-02
Train: 21 [ 250/390]  Loss: 0.3635 (0.327)  Acc@1: 89.0625 (88.6765)  Acc@5: 100.0000 (99.6763)LR: 1.598e-02
Train: 21 [ 300/390]  Loss: 0.1699 (0.326)  Acc@1: 96.8750 (88.6109)  Acc@5: 100.0000 (99.6833)LR: 1.598e-02
Train: 21 [ 350/390]  Loss: 0.2525 (0.331)  Acc@1: 92.1875 (88.4882)  Acc@5: 100.0000 (99.6528)LR: 1.598e-02
Train: 21 [ 390/390]  Loss: 0.2779 (0.332)  Acc@1: 92.5000 (88.4160)  Acc@5: 100.0000 (99.6360)LR: 1.598e-02
train_acc 88.416000
Valid: 21 [   0/390]  Loss: 0.4371 (0.437)  Acc@1: 87.5000 (87.5000)  Acc@5: 100.0000 (100.0000)
Valid: 21 [  50/390]  Loss: 0.7792 (0.481)  Acc@1: 79.6875 (83.9154)  Acc@5: 96.8750 (99.1728)
Valid: 21 [ 100/390]  Loss: 0.5795 (0.478)  Acc@1: 79.6875 (84.1894)  Acc@5: 100.0000 (99.1955)
Valid: 21 [ 150/390]  Loss: 0.6909 (0.472)  Acc@1: 81.2500 (84.4681)  Acc@5: 96.8750 (99.2032)
Valid: 21 [ 200/390]  Loss: 0.5816 (0.471)  Acc@1: 78.1250 (84.2973)  Acc@5: 100.0000 (99.2460)
Valid: 21 [ 250/390]  Loss: 0.5975 (0.466)  Acc@1: 76.5625 (84.4124)  Acc@5: 96.8750 (99.3090)
Valid: 21 [ 300/390]  Loss: 0.5537 (0.468)  Acc@1: 79.6875 (84.3231)  Acc@5: 96.8750 (99.2992)
Valid: 21 [ 350/390]  Loss: 0.5665 (0.470)  Acc@1: 82.8125 (84.2637)  Acc@5: 100.0000 (99.2699)
Valid: 21 [ 390/390]  Loss: 0.6099 (0.473)  Acc@1: 82.5000 (84.1880)  Acc@5: 92.5000 (99.2560)
valid_acc 84.188000
epoch = 21   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 3), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_5x5', 3), ('max_pool_3x3', 1), ('max_pool_3x3', 0)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1375, 0.0790, 0.0606, 0.0900, 0.1854, 0.1671, 0.1492, 0.1311],
        [0.1696, 0.0666, 0.0585, 0.0803, 0.1803, 0.1517, 0.1487, 0.1443],
        [0.1751, 0.0906, 0.0641, 0.0970, 0.1390, 0.1452, 0.1494, 0.1396],
        [0.1782, 0.0718, 0.0621, 0.0854, 0.1804, 0.1546, 0.1302, 0.1372],
        [0.2166, 0.0674, 0.0539, 0.0944, 0.1398, 0.1431, 0.1454, 0.1394],
        [0.1985, 0.0842, 0.0656, 0.0939, 0.1488, 0.1494, 0.1276, 0.1321],
        [0.1825, 0.0703, 0.0640, 0.0855, 0.1603, 0.1943, 0.1197, 0.1235],
        [0.2244, 0.0672, 0.0556, 0.0935, 0.1439, 0.1385, 0.1392, 0.1377],
        [0.2421, 0.0561, 0.0503, 0.0701, 0.1501, 0.1372, 0.1493, 0.1448],
        [0.2372, 0.0824, 0.0643, 0.0910, 0.1462, 0.1281, 0.1276, 0.1231],
        [0.2136, 0.0665, 0.0600, 0.0827, 0.1833, 0.1183, 0.1356, 0.1400],
        [0.2783, 0.0639, 0.0537, 0.0931, 0.1239, 0.1219, 0.1314, 0.1338],
        [0.3181, 0.0533, 0.0484, 0.0677, 0.1266, 0.1188, 0.1302, 0.1369],
        [0.3095, 0.0496, 0.0443, 0.0560, 0.1330, 0.1276, 0.1232, 0.1568]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1052, 0.1598, 0.1266, 0.1096, 0.1338, 0.1275, 0.1185, 0.1189],
        [0.1396, 0.1340, 0.1199, 0.1212, 0.1126, 0.1223, 0.1225, 0.1280],
        [0.1073, 0.1520, 0.1249, 0.1142, 0.1142, 0.1387, 0.1335, 0.1150],
        [0.1285, 0.1421, 0.1283, 0.1225, 0.1069, 0.1265, 0.1440, 0.1012],
        [0.1351, 0.1091, 0.0896, 0.1258, 0.1264, 0.1257, 0.1381, 0.1502],
        [0.1073, 0.1539, 0.1209, 0.1360, 0.1192, 0.1297, 0.1196, 0.1134],
        [0.1169, 0.1425, 0.1289, 0.1290, 0.1141, 0.1174, 0.1225, 0.1287],
        [0.1316, 0.1071, 0.0914, 0.1317, 0.1490, 0.1352, 0.1244, 0.1296],
        [0.1428, 0.0999, 0.0887, 0.1261, 0.1285, 0.1187, 0.1441, 0.1512],
        [0.1126, 0.1523, 0.1265, 0.1115, 0.1178, 0.1359, 0.1212, 0.1222],
        [0.1199, 0.1534, 0.1399, 0.1138, 0.1209, 0.1241, 0.1163, 0.1117],
        [0.1352, 0.1060, 0.0947, 0.1416, 0.1344, 0.1281, 0.1157, 0.1443],
        [0.1427, 0.1026, 0.0921, 0.1374, 0.1289, 0.1203, 0.1469, 0.1292],
        [0.1519, 0.0996, 0.0915, 0.1330, 0.1245, 0.1341, 0.1342, 0.1312]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 22 [   0/390]  Loss: 0.4111 (0.411)  Acc@1: 81.2500 (81.2500)  Acc@5: 100.0000 (100.0000)LR: 1.525e-02
Train: 22 [  50/390]  Loss: 0.3331 (0.328)  Acc@1: 90.6250 (88.4191)  Acc@5: 100.0000 (99.7549)LR: 1.525e-02
Train: 22 [ 100/390]  Loss: 0.3579 (0.327)  Acc@1: 84.3750 (88.5520)  Acc@5: 98.4375 (99.6751)LR: 1.525e-02
Train: 22 [ 150/390]  Loss: 0.3099 (0.323)  Acc@1: 92.1875 (88.7831)  Acc@5: 100.0000 (99.6689)LR: 1.525e-02
Train: 22 [ 200/390]  Loss: 0.3633 (0.325)  Acc@1: 87.5000 (88.6738)  Acc@5: 98.4375 (99.6657)LR: 1.525e-02
Train: 22 [ 250/390]  Loss: 0.2857 (0.323)  Acc@1: 89.0625 (88.7450)  Acc@5: 100.0000 (99.6763)LR: 1.525e-02
Train: 22 [ 300/390]  Loss: 0.2105 (0.321)  Acc@1: 93.7500 (88.7770)  Acc@5: 100.0000 (99.6782)LR: 1.525e-02
Train: 22 [ 350/390]  Loss: 0.3260 (0.322)  Acc@1: 87.5000 (88.7776)  Acc@5: 100.0000 (99.6661)LR: 1.525e-02
Train: 22 [ 390/390]  Loss: 0.4425 (0.326)  Acc@1: 85.0000 (88.6560)  Acc@5: 100.0000 (99.6640)LR: 1.525e-02
train_acc 88.656000
Valid: 22 [   0/390]  Loss: 0.3288 (0.329)  Acc@1: 87.5000 (87.5000)  Acc@5: 100.0000 (100.0000)
Valid: 22 [  50/390]  Loss: 0.4039 (0.470)  Acc@1: 87.5000 (84.0993)  Acc@5: 98.4375 (99.1728)
Valid: 22 [ 100/390]  Loss: 0.3411 (0.449)  Acc@1: 89.0625 (84.6689)  Acc@5: 100.0000 (99.3812)
Valid: 22 [ 150/390]  Loss: 0.3282 (0.452)  Acc@1: 90.6250 (84.6440)  Acc@5: 100.0000 (99.3377)
Valid: 22 [ 200/390]  Loss: 0.4073 (0.453)  Acc@1: 89.0625 (84.8881)  Acc@5: 100.0000 (99.3004)
Valid: 22 [ 250/390]  Loss: 0.2426 (0.453)  Acc@1: 93.7500 (85.1718)  Acc@5: 100.0000 (99.2966)
Valid: 22 [ 300/390]  Loss: 0.5978 (0.450)  Acc@1: 78.1250 (85.2938)  Acc@5: 100.0000 (99.2836)
Valid: 22 [ 350/390]  Loss: 0.4537 (0.452)  Acc@1: 85.9375 (85.2386)  Acc@5: 100.0000 (99.3056)
Valid: 22 [ 390/390]  Loss: 0.3979 (0.452)  Acc@1: 85.0000 (85.2280)  Acc@5: 97.5000 (99.3080)
valid_acc 85.228000
epoch = 22   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 3), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_5x5', 3), ('max_pool_3x3', 1), ('max_pool_3x3', 0)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1367, 0.0776, 0.0598, 0.0896, 0.1903, 0.1664, 0.1491, 0.1305],
        [0.1735, 0.0648, 0.0571, 0.0794, 0.1820, 0.1507, 0.1489, 0.1436],
        [0.1783, 0.0891, 0.0629, 0.0968, 0.1394, 0.1431, 0.1515, 0.1389],
        [0.1841, 0.0695, 0.0603, 0.0839, 0.1804, 0.1549, 0.1297, 0.1372],
        [0.2205, 0.0650, 0.0519, 0.0927, 0.1401, 0.1438, 0.1469, 0.1391],
        [0.2021, 0.0829, 0.0648, 0.0940, 0.1471, 0.1492, 0.1274, 0.1325],
        [0.1876, 0.0678, 0.0621, 0.0840, 0.1605, 0.1963, 0.1187, 0.1230],
        [0.2327, 0.0652, 0.0539, 0.0928, 0.1428, 0.1371, 0.1378, 0.1378],
        [0.2512, 0.0540, 0.0484, 0.0685, 0.1498, 0.1341, 0.1494, 0.1446],
        [0.2458, 0.0808, 0.0630, 0.0903, 0.1447, 0.1258, 0.1262, 0.1234],
        [0.2225, 0.0643, 0.0583, 0.0814, 0.1840, 0.1155, 0.1347, 0.1393],
        [0.2906, 0.0616, 0.0520, 0.0920, 0.1224, 0.1185, 0.1309, 0.1319],
        [0.3353, 0.0507, 0.0462, 0.0654, 0.1234, 0.1159, 0.1273, 0.1358],
        [0.3256, 0.0471, 0.0422, 0.0538, 0.1299, 0.1251, 0.1208, 0.1554]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1047, 0.1595, 0.1265, 0.1097, 0.1343, 0.1271, 0.1199, 0.1183],
        [0.1386, 0.1340, 0.1196, 0.1207, 0.1127, 0.1224, 0.1228, 0.1293],
        [0.1066, 0.1523, 0.1262, 0.1139, 0.1141, 0.1398, 0.1338, 0.1134],
        [0.1278, 0.1429, 0.1296, 0.1221, 0.1069, 0.1260, 0.1444, 0.1003],
        [0.1354, 0.1084, 0.0901, 0.1275, 0.1254, 0.1251, 0.1383, 0.1498],
        [0.1063, 0.1543, 0.1216, 0.1369, 0.1188, 0.1298, 0.1198, 0.1126],
        [0.1167, 0.1433, 0.1302, 0.1296, 0.1132, 0.1155, 0.1223, 0.1291],
        [0.1313, 0.1068, 0.0921, 0.1336, 0.1496, 0.1333, 0.1251, 0.1283],
        [0.1436, 0.0988, 0.0890, 0.1274, 0.1268, 0.1188, 0.1446, 0.1511],
        [0.1127, 0.1520, 0.1275, 0.1119, 0.1175, 0.1361, 0.1204, 0.1219],
        [0.1187, 0.1548, 0.1414, 0.1135, 0.1203, 0.1232, 0.1169, 0.1111],
        [0.1374, 0.1052, 0.0957, 0.1458, 0.1327, 0.1257, 0.1143, 0.1433],
        [0.1439, 0.1011, 0.0924, 0.1394, 0.1279, 0.1192, 0.1475, 0.1288],
        [0.1535, 0.0981, 0.0913, 0.1348, 0.1241, 0.1337, 0.1334, 0.1312]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 23 [   0/390]  Loss: 0.5096 (0.510)  Acc@1: 81.2500 (81.2500)  Acc@5: 96.8750 (96.8750)LR: 1.450e-02
Train: 23 [  50/390]  Loss: 0.1975 (0.307)  Acc@1: 95.3125 (89.0319)  Acc@5: 100.0000 (99.6324)LR: 1.450e-02
Train: 23 [ 100/390]  Loss: 0.3052 (0.284)  Acc@1: 87.5000 (90.2692)  Acc@5: 100.0000 (99.6751)LR: 1.450e-02
Train: 23 [ 150/390]  Loss: 0.3937 (0.294)  Acc@1: 87.5000 (89.8800)  Acc@5: 98.4375 (99.6482)LR: 1.450e-02
Train: 23 [ 200/390]  Loss: 0.2338 (0.304)  Acc@1: 92.1875 (89.4823)  Acc@5: 100.0000 (99.6657)LR: 1.450e-02
Train: 23 [ 250/390]  Loss: 0.3773 (0.305)  Acc@1: 82.8125 (89.4111)  Acc@5: 100.0000 (99.6763)LR: 1.450e-02
Train: 23 [ 300/390]  Loss: 0.3028 (0.308)  Acc@1: 89.0625 (89.2909)  Acc@5: 100.0000 (99.6833)LR: 1.450e-02
Train: 23 [ 350/390]  Loss: 0.3193 (0.309)  Acc@1: 89.0625 (89.2050)  Acc@5: 100.0000 (99.6884)LR: 1.450e-02
Train: 23 [ 390/390]  Loss: 0.3235 (0.312)  Acc@1: 87.5000 (89.0520)  Acc@5: 100.0000 (99.6840)LR: 1.450e-02
train_acc 89.052000
Valid: 23 [   0/390]  Loss: 0.6458 (0.646)  Acc@1: 82.8125 (82.8125)  Acc@5: 96.8750 (96.8750)
Valid: 23 [  50/390]  Loss: 0.2799 (0.456)  Acc@1: 90.6250 (84.8039)  Acc@5: 100.0000 (99.2647)
Valid: 23 [ 100/390]  Loss: 0.5072 (0.446)  Acc@1: 81.2500 (85.1949)  Acc@5: 100.0000 (99.3193)
Valid: 23 [ 150/390]  Loss: 0.4859 (0.455)  Acc@1: 89.0625 (85.1511)  Acc@5: 96.8750 (99.3067)
Valid: 23 [ 200/390]  Loss: 0.6389 (0.457)  Acc@1: 78.1250 (85.0902)  Acc@5: 98.4375 (99.3004)
Valid: 23 [ 250/390]  Loss: 0.3195 (0.461)  Acc@1: 85.9375 (84.9726)  Acc@5: 100.0000 (99.2966)
Valid: 23 [ 300/390]  Loss: 0.5985 (0.463)  Acc@1: 81.2500 (84.8318)  Acc@5: 100.0000 (99.2784)
Valid: 23 [ 350/390]  Loss: 0.4503 (0.463)  Acc@1: 82.8125 (84.7623)  Acc@5: 100.0000 (99.3056)
Valid: 23 [ 390/390]  Loss: 0.3531 (0.465)  Acc@1: 85.0000 (84.7000)  Acc@5: 100.0000 (99.3320)
valid_acc 84.700000
epoch = 23   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_5x5', 3), ('max_pool_3x3', 1), ('max_pool_3x3', 0)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1365, 0.0763, 0.0581, 0.0884, 0.1930, 0.1681, 0.1493, 0.1303],
        [0.1776, 0.0626, 0.0552, 0.0779, 0.1819, 0.1499, 0.1499, 0.1450],
        [0.1827, 0.0884, 0.0615, 0.0965, 0.1391, 0.1417, 0.1519, 0.1383],
        [0.1884, 0.0673, 0.0585, 0.0825, 0.1821, 0.1542, 0.1285, 0.1386],
        [0.2285, 0.0628, 0.0500, 0.0912, 0.1386, 0.1443, 0.1458, 0.1388],
        [0.2054, 0.0822, 0.0637, 0.0936, 0.1466, 0.1493, 0.1268, 0.1323],
        [0.1932, 0.0655, 0.0603, 0.0828, 0.1601, 0.1971, 0.1182, 0.1229],
        [0.2399, 0.0631, 0.0522, 0.0917, 0.1424, 0.1359, 0.1374, 0.1374],
        [0.2611, 0.0520, 0.0468, 0.0669, 0.1485, 0.1327, 0.1471, 0.1448],
        [0.2558, 0.0792, 0.0611, 0.0889, 0.1429, 0.1232, 0.1253, 0.1235],
        [0.2325, 0.0622, 0.0564, 0.0800, 0.1849, 0.1139, 0.1329, 0.1373],
        [0.3028, 0.0593, 0.0500, 0.0901, 0.1207, 0.1163, 0.1300, 0.1308],
        [0.3522, 0.0486, 0.0442, 0.0631, 0.1199, 0.1126, 0.1240, 0.1355],
        [0.3429, 0.0452, 0.0405, 0.0521, 0.1253, 0.1222, 0.1185, 0.1531]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1039, 0.1612, 0.1287, 0.1090, 0.1346, 0.1266, 0.1189, 0.1172],
        [0.1399, 0.1345, 0.1205, 0.1202, 0.1130, 0.1223, 0.1221, 0.1274],
        [0.1056, 0.1528, 0.1272, 0.1131, 0.1127, 0.1414, 0.1348, 0.1123],
        [0.1279, 0.1442, 0.1314, 0.1218, 0.1067, 0.1256, 0.1434, 0.0991],
        [0.1361, 0.1069, 0.0893, 0.1278, 0.1259, 0.1246, 0.1380, 0.1514],
        [0.1049, 0.1547, 0.1226, 0.1383, 0.1178, 0.1298, 0.1205, 0.1115],
        [0.1165, 0.1434, 0.1313, 0.1305, 0.1135, 0.1140, 0.1225, 0.1284],
        [0.1325, 0.1054, 0.0915, 0.1340, 0.1489, 0.1336, 0.1260, 0.1281],
        [0.1442, 0.0975, 0.0885, 0.1276, 0.1268, 0.1186, 0.1454, 0.1515],
        [0.1117, 0.1519, 0.1280, 0.1127, 0.1178, 0.1364, 0.1203, 0.1211],
        [0.1186, 0.1552, 0.1420, 0.1131, 0.1209, 0.1231, 0.1159, 0.1112],
        [0.1392, 0.1029, 0.0939, 0.1456, 0.1323, 0.1262, 0.1149, 0.1448],
        [0.1453, 0.0988, 0.0907, 0.1388, 0.1268, 0.1198, 0.1501, 0.1297],
        [0.1544, 0.0958, 0.0895, 0.1340, 0.1247, 0.1347, 0.1338, 0.1331]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 24 [   0/390]  Loss: 0.1876 (0.188)  Acc@1: 93.7500 (93.7500)  Acc@5: 100.0000 (100.0000)LR: 1.375e-02
Train: 24 [  50/390]  Loss: 0.2722 (0.277)  Acc@1: 90.6250 (89.8284)  Acc@5: 100.0000 (99.6936)LR: 1.375e-02
Train: 24 [ 100/390]  Loss: 0.3473 (0.282)  Acc@1: 89.0625 (89.8051)  Acc@5: 98.4375 (99.7061)LR: 1.375e-02
Train: 24 [ 150/390]  Loss: 0.1757 (0.277)  Acc@1: 92.1875 (90.0973)  Acc@5: 100.0000 (99.7413)LR: 1.375e-02
Train: 24 [ 200/390]  Loss: 0.2969 (0.284)  Acc@1: 87.5000 (89.8321)  Acc@5: 100.0000 (99.7590)LR: 1.375e-02
Train: 24 [ 250/390]  Loss: 0.3692 (0.287)  Acc@1: 84.3750 (89.8095)  Acc@5: 100.0000 (99.7385)LR: 1.375e-02
Train: 24 [ 300/390]  Loss: 0.3893 (0.288)  Acc@1: 84.3750 (89.7841)  Acc@5: 100.0000 (99.7508)LR: 1.375e-02
Train: 24 [ 350/390]  Loss: 0.3643 (0.294)  Acc@1: 89.0625 (89.6234)  Acc@5: 98.4375 (99.7418)LR: 1.375e-02
Train: 24 [ 390/390]  Loss: 0.5656 (0.300)  Acc@1: 80.0000 (89.4160)  Acc@5: 100.0000 (99.7440)LR: 1.375e-02
train_acc 89.416000
Valid: 24 [   0/390]  Loss: 0.4082 (0.408)  Acc@1: 84.3750 (84.3750)  Acc@5: 98.4375 (98.4375)
Valid: 24 [  50/390]  Loss: 0.5523 (0.415)  Acc@1: 84.3750 (86.0907)  Acc@5: 98.4375 (99.3566)
Valid: 24 [ 100/390]  Loss: 0.3102 (0.418)  Acc@1: 93.7500 (86.2624)  Acc@5: 100.0000 (99.2729)
Valid: 24 [ 150/390]  Loss: 0.3997 (0.418)  Acc@1: 85.9375 (86.3411)  Acc@5: 100.0000 (99.2343)
Valid: 24 [ 200/390]  Loss: 0.4850 (0.422)  Acc@1: 89.0625 (86.2174)  Acc@5: 100.0000 (99.2771)
Valid: 24 [ 250/390]  Loss: 0.4074 (0.425)  Acc@1: 87.5000 (86.0371)  Acc@5: 98.4375 (99.2903)
Valid: 24 [ 300/390]  Loss: 0.4562 (0.430)  Acc@1: 87.5000 (85.8025)  Acc@5: 100.0000 (99.2733)
Valid: 24 [ 350/390]  Loss: 0.4870 (0.430)  Acc@1: 81.2500 (85.7327)  Acc@5: 98.4375 (99.2655)
Valid: 24 [ 390/390]  Loss: 0.4492 (0.432)  Acc@1: 85.0000 (85.7160)  Acc@5: 100.0000 (99.2680)
valid_acc 85.716000
epoch = 24   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_5x5', 3), ('max_pool_3x3', 1), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1368, 0.0746, 0.0572, 0.0888, 0.1960, 0.1674, 0.1497, 0.1295],
        [0.1818, 0.0603, 0.0533, 0.0759, 0.1849, 0.1496, 0.1495, 0.1447],
        [0.1850, 0.0872, 0.0609, 0.0972, 0.1390, 0.1417, 0.1518, 0.1373],
        [0.1931, 0.0654, 0.0572, 0.0812, 0.1813, 0.1557, 0.1281, 0.1379],
        [0.2363, 0.0610, 0.0485, 0.0901, 0.1371, 0.1436, 0.1452, 0.1382],
        [0.2101, 0.0807, 0.0631, 0.0942, 0.1463, 0.1480, 0.1272, 0.1304],
        [0.1987, 0.0635, 0.0589, 0.0817, 0.1593, 0.1990, 0.1178, 0.1212],
        [0.2479, 0.0613, 0.0505, 0.0905, 0.1418, 0.1344, 0.1361, 0.1374],
        [0.2701, 0.0501, 0.0453, 0.0655, 0.1461, 0.1321, 0.1460, 0.1448],
        [0.2641, 0.0775, 0.0600, 0.0888, 0.1426, 0.1213, 0.1239, 0.1218],
        [0.2415, 0.0608, 0.0550, 0.0789, 0.1834, 0.1120, 0.1324, 0.1360],
        [0.3151, 0.0574, 0.0482, 0.0886, 0.1181, 0.1128, 0.1298, 0.1299],
        [0.3677, 0.0466, 0.0427, 0.0614, 0.1163, 0.1107, 0.1216, 0.1331],
        [0.3595, 0.0432, 0.0388, 0.0503, 0.1218, 0.1187, 0.1165, 0.1511]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1036, 0.1605, 0.1280, 0.1097, 0.1352, 0.1265, 0.1200, 0.1165],
        [0.1395, 0.1350, 0.1208, 0.1199, 0.1128, 0.1229, 0.1214, 0.1277],
        [0.1045, 0.1522, 0.1272, 0.1135, 0.1128, 0.1427, 0.1345, 0.1127],
        [0.1284, 0.1450, 0.1319, 0.1206, 0.1064, 0.1262, 0.1434, 0.0980],
        [0.1365, 0.1057, 0.0887, 0.1289, 0.1265, 0.1233, 0.1385, 0.1517],
        [0.1042, 0.1546, 0.1229, 0.1383, 0.1192, 0.1278, 0.1216, 0.1114],
        [0.1160, 0.1440, 0.1321, 0.1306, 0.1136, 0.1123, 0.1225, 0.1290],
        [0.1335, 0.1046, 0.0918, 0.1364, 0.1488, 0.1319, 0.1261, 0.1270],
        [0.1450, 0.0958, 0.0883, 0.1286, 0.1260, 0.1177, 0.1456, 0.1531],
        [0.1113, 0.1513, 0.1275, 0.1131, 0.1192, 0.1357, 0.1210, 0.1208],
        [0.1187, 0.1569, 0.1429, 0.1125, 0.1203, 0.1231, 0.1154, 0.1103],
        [0.1400, 0.1019, 0.0936, 0.1479, 0.1323, 0.1253, 0.1146, 0.1443],
        [0.1448, 0.0971, 0.0902, 0.1395, 0.1279, 0.1193, 0.1521, 0.1292],
        [0.1557, 0.0939, 0.0886, 0.1344, 0.1249, 0.1342, 0.1346, 0.1337]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 25 [   0/390]  Loss: 0.3053 (0.305)  Acc@1: 92.1875 (92.1875)  Acc@5: 100.0000 (100.0000)LR: 1.300e-02
Train: 25 [  50/390]  Loss: 0.09356 (0.280)  Acc@1: 95.3125 (90.1654)  Acc@5: 100.0000 (99.8468)LR: 1.300e-02
Train: 25 [ 100/390]  Loss: 0.3299 (0.271)  Acc@1: 89.0625 (90.7952)  Acc@5: 100.0000 (99.7370)LR: 1.300e-02
Train: 25 [ 150/390]  Loss: 0.3939 (0.289)  Acc@1: 82.8125 (90.2214)  Acc@5: 100.0000 (99.7517)LR: 1.300e-02
Train: 25 [ 200/390]  Loss: 0.1422 (0.286)  Acc@1: 95.3125 (90.3374)  Acc@5: 100.0000 (99.7512)LR: 1.300e-02
Train: 25 [ 250/390]  Loss: 0.1725 (0.288)  Acc@1: 93.7500 (90.2515)  Acc@5: 100.0000 (99.7510)LR: 1.300e-02
Train: 25 [ 300/390]  Loss: 0.1681 (0.289)  Acc@1: 95.3125 (90.0540)  Acc@5: 100.0000 (99.7560)LR: 1.300e-02
Train: 25 [ 350/390]  Loss: 0.3616 (0.290)  Acc@1: 85.9375 (90.0686)  Acc@5: 100.0000 (99.7596)LR: 1.300e-02
Train: 25 [ 390/390]  Loss: 0.3306 (0.290)  Acc@1: 85.0000 (90.0280)  Acc@5: 100.0000 (99.7720)LR: 1.300e-02
train_acc 90.028000
Valid: 25 [   0/390]  Loss: 0.3704 (0.370)  Acc@1: 84.3750 (84.3750)  Acc@5: 100.0000 (100.0000)
Valid: 25 [  50/390]  Loss: 0.3871 (0.442)  Acc@1: 89.0625 (85.7843)  Acc@5: 100.0000 (99.4485)
Valid: 25 [ 100/390]  Loss: 0.6402 (0.425)  Acc@1: 81.2500 (86.0613)  Acc@5: 100.0000 (99.4121)
Valid: 25 [ 150/390]  Loss: 0.2077 (0.416)  Acc@1: 95.3125 (86.4445)  Acc@5: 98.4375 (99.4412)
Valid: 25 [ 200/390]  Loss: 0.5834 (0.427)  Acc@1: 84.3750 (86.1396)  Acc@5: 98.4375 (99.3781)
Valid: 25 [ 250/390]  Loss: 0.6809 (0.429)  Acc@1: 79.6875 (86.0060)  Acc@5: 96.8750 (99.3899)
Valid: 25 [ 300/390]  Loss: 0.3715 (0.434)  Acc@1: 85.9375 (85.8493)  Acc@5: 100.0000 (99.3459)
Valid: 25 [ 350/390]  Loss: 0.3250 (0.432)  Acc@1: 92.1875 (85.8885)  Acc@5: 100.0000 (99.3812)
Valid: 25 [ 390/390]  Loss: 0.3425 (0.430)  Acc@1: 95.0000 (85.9880)  Acc@5: 97.5000 (99.3840)
valid_acc 85.988000
epoch = 25   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 0), ('dil_conv_5x5', 3), ('max_pool_3x3', 1), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1367, 0.0732, 0.0562, 0.0886, 0.2003, 0.1674, 0.1493, 0.1282],
        [0.1862, 0.0582, 0.0515, 0.0743, 0.1867, 0.1482, 0.1503, 0.1446],
        [0.1875, 0.0859, 0.0602, 0.0980, 0.1370, 0.1421, 0.1526, 0.1368],
        [0.1972, 0.0636, 0.0557, 0.0803, 0.1818, 0.1568, 0.1276, 0.1370],
        [0.2406, 0.0589, 0.0471, 0.0884, 0.1360, 0.1444, 0.1464, 0.1382],
        [0.2155, 0.0794, 0.0624, 0.0949, 0.1464, 0.1463, 0.1261, 0.1291],
        [0.2046, 0.0612, 0.0570, 0.0800, 0.1588, 0.2002, 0.1171, 0.1209],
        [0.2562, 0.0587, 0.0488, 0.0888, 0.1411, 0.1336, 0.1360, 0.1369],
        [0.2809, 0.0483, 0.0441, 0.0643, 0.1442, 0.1305, 0.1442, 0.1436],
        [0.2723, 0.0763, 0.0592, 0.0897, 0.1413, 0.1194, 0.1226, 0.1193],
        [0.2520, 0.0591, 0.0536, 0.0782, 0.1824, 0.1110, 0.1306, 0.1332],
        [0.3265, 0.0554, 0.0469, 0.0876, 0.1165, 0.1095, 0.1278, 0.1298],
        [0.3827, 0.0449, 0.0415, 0.0605, 0.1121, 0.1073, 0.1198, 0.1310],
        [0.3772, 0.0410, 0.0372, 0.0486, 0.1180, 0.1142, 0.1152, 0.1487]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1032, 0.1624, 0.1298, 0.1088, 0.1341, 0.1254, 0.1216, 0.1147],
        [0.1393, 0.1355, 0.1215, 0.1197, 0.1125, 0.1220, 0.1228, 0.1268],
        [0.1039, 0.1526, 0.1278, 0.1138, 0.1118, 0.1429, 0.1352, 0.1121],
        [0.1278, 0.1447, 0.1322, 0.1214, 0.1064, 0.1260, 0.1438, 0.0978],
        [0.1379, 0.1047, 0.0877, 0.1300, 0.1263, 0.1227, 0.1378, 0.1528],
        [0.1033, 0.1559, 0.1239, 0.1396, 0.1184, 0.1269, 0.1219, 0.1101],
        [0.1160, 0.1435, 0.1325, 0.1307, 0.1132, 0.1111, 0.1231, 0.1299],
        [0.1321, 0.1036, 0.0910, 0.1368, 0.1514, 0.1303, 0.1263, 0.1285],
        [0.1456, 0.0949, 0.0885, 0.1306, 0.1243, 0.1170, 0.1459, 0.1532],
        [0.1102, 0.1512, 0.1274, 0.1127, 0.1198, 0.1367, 0.1217, 0.1204],
        [0.1183, 0.1564, 0.1426, 0.1116, 0.1210, 0.1228, 0.1169, 0.1104],
        [0.1412, 0.1006, 0.0923, 0.1492, 0.1320, 0.1251, 0.1145, 0.1452],
        [0.1461, 0.0952, 0.0892, 0.1404, 0.1286, 0.1186, 0.1526, 0.1293],
        [0.1577, 0.0920, 0.0873, 0.1351, 0.1241, 0.1332, 0.1355, 0.1351]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 26 [   0/390]  Loss: 0.09671 (0.0967)  Acc@1: 98.4375 (98.4375)  Acc@5: 100.0000 (100.0000)LR: 1.225e-02
Train: 26 [  50/390]  Loss: 0.1785 (0.279)  Acc@1: 93.7500 (90.0735)  Acc@5: 100.0000 (99.7549)LR: 1.225e-02
Train: 26 [ 100/390]  Loss: 0.3637 (0.278)  Acc@1: 85.9375 (90.1145)  Acc@5: 100.0000 (99.7525)LR: 1.225e-02
Train: 26 [ 150/390]  Loss: 0.1631 (0.277)  Acc@1: 95.3125 (90.2318)  Acc@5: 100.0000 (99.7517)LR: 1.225e-02
Train: 26 [ 200/390]  Loss: 0.4928 (0.283)  Acc@1: 81.2500 (89.8632)  Acc@5: 100.0000 (99.7746)LR: 1.225e-02
Train: 26 [ 250/390]  Loss: 0.2187 (0.283)  Acc@1: 92.1875 (89.8406)  Acc@5: 100.0000 (99.7510)LR: 1.225e-02
Train: 26 [ 300/390]  Loss: 0.1748 (0.282)  Acc@1: 95.3125 (89.9865)  Acc@5: 100.0000 (99.7508)LR: 1.225e-02
Train: 26 [ 350/390]  Loss: 0.2155 (0.284)  Acc@1: 95.3125 (89.9973)  Acc@5: 100.0000 (99.7418)LR: 1.225e-02
Train: 26 [ 390/390]  Loss: 0.1687 (0.287)  Acc@1: 95.0000 (89.8480)  Acc@5: 100.0000 (99.7360)LR: 1.225e-02
train_acc 89.848000
Valid: 26 [   0/390]  Loss: 0.4524 (0.452)  Acc@1: 85.9375 (85.9375)  Acc@5: 100.0000 (100.0000)
Valid: 26 [  50/390]  Loss: 0.5842 (0.412)  Acc@1: 82.8125 (86.1520)  Acc@5: 100.0000 (99.5098)
Valid: 26 [ 100/390]  Loss: 0.3726 (0.402)  Acc@1: 87.5000 (86.5099)  Acc@5: 98.4375 (99.5050)
Valid: 26 [ 150/390]  Loss: 0.3168 (0.413)  Acc@1: 90.6250 (86.1858)  Acc@5: 100.0000 (99.4205)
Valid: 26 [ 200/390]  Loss: 0.3562 (0.413)  Acc@1: 85.9375 (86.2873)  Acc@5: 100.0000 (99.4092)
Valid: 26 [ 250/390]  Loss: 0.3143 (0.413)  Acc@1: 84.3750 (86.2674)  Acc@5: 100.0000 (99.3962)
Valid: 26 [ 300/390]  Loss: 0.4246 (0.412)  Acc@1: 87.5000 (86.3891)  Acc@5: 98.4375 (99.3511)
Valid: 26 [ 350/390]  Loss: 0.4221 (0.410)  Acc@1: 93.7500 (86.4717)  Acc@5: 98.4375 (99.3278)
Valid: 26 [ 390/390]  Loss: 0.3899 (0.416)  Acc@1: 90.0000 (86.4280)  Acc@5: 100.0000 (99.3000)
valid_acc 86.428000
epoch = 26   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_5x5', 3), ('max_pool_3x3', 1), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1382, 0.0710, 0.0549, 0.0874, 0.2036, 0.1693, 0.1477, 0.1279],
        [0.1880, 0.0565, 0.0502, 0.0734, 0.1867, 0.1488, 0.1516, 0.1448],
        [0.1931, 0.0840, 0.0595, 0.0983, 0.1352, 0.1408, 0.1535, 0.1354],
        [0.2010, 0.0620, 0.0546, 0.0796, 0.1822, 0.1572, 0.1266, 0.1368],
        [0.2455, 0.0569, 0.0458, 0.0867, 0.1376, 0.1433, 0.1453, 0.1389],
        [0.2223, 0.0777, 0.0614, 0.0945, 0.1452, 0.1452, 0.1259, 0.1276],
        [0.2103, 0.0599, 0.0560, 0.0797, 0.1595, 0.1996, 0.1156, 0.1195],
        [0.2644, 0.0571, 0.0478, 0.0883, 0.1395, 0.1328, 0.1340, 0.1361],
        [0.2916, 0.0467, 0.0428, 0.0631, 0.1416, 0.1296, 0.1423, 0.1423],
        [0.2823, 0.0740, 0.0580, 0.0891, 0.1396, 0.1173, 0.1215, 0.1182],
        [0.2629, 0.0575, 0.0527, 0.0778, 0.1797, 0.1078, 0.1298, 0.1317],
        [0.3379, 0.0535, 0.0457, 0.0865, 0.1143, 0.1055, 0.1275, 0.1292],
        [0.4013, 0.0428, 0.0400, 0.0587, 0.1086, 0.1030, 0.1167, 0.1290],
        [0.3928, 0.0391, 0.0356, 0.0467, 0.1156, 0.1112, 0.1118, 0.1471]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1023, 0.1651, 0.1322, 0.1082, 0.1331, 0.1245, 0.1216, 0.1130],
        [0.1393, 0.1354, 0.1221, 0.1209, 0.1119, 0.1224, 0.1232, 0.1248],
        [0.1024, 0.1538, 0.1296, 0.1146, 0.1113, 0.1425, 0.1352, 0.1106],
        [0.1276, 0.1444, 0.1329, 0.1213, 0.1053, 0.1255, 0.1446, 0.0982],
        [0.1393, 0.1031, 0.0875, 0.1315, 0.1267, 0.1215, 0.1369, 0.1535],
        [0.1025, 0.1574, 0.1254, 0.1399, 0.1177, 0.1265, 0.1216, 0.1090],
        [0.1158, 0.1430, 0.1339, 0.1309, 0.1130, 0.1088, 0.1238, 0.1307],
        [0.1327, 0.1013, 0.0906, 0.1380, 0.1532, 0.1299, 0.1261, 0.1282],
        [0.1466, 0.0927, 0.0883, 0.1310, 0.1244, 0.1179, 0.1453, 0.1537],
        [0.1093, 0.1522, 0.1288, 0.1130, 0.1203, 0.1358, 0.1210, 0.1195],
        [0.1179, 0.1566, 0.1443, 0.1101, 0.1214, 0.1223, 0.1166, 0.1108],
        [0.1427, 0.0987, 0.0920, 0.1518, 0.1314, 0.1233, 0.1136, 0.1466],
        [0.1473, 0.0931, 0.0888, 0.1418, 0.1281, 0.1188, 0.1537, 0.1284],
        [0.1595, 0.0900, 0.0867, 0.1360, 0.1242, 0.1324, 0.1358, 0.1353]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 27 [   0/390]  Loss: 0.3307 (0.331)  Acc@1: 90.6250 (90.6250)  Acc@5: 98.4375 (98.4375)LR: 1.150e-02
Train: 27 [  50/390]  Loss: 0.2305 (0.261)  Acc@1: 92.1875 (90.8395)  Acc@5: 100.0000 (99.8162)LR: 1.150e-02
Train: 27 [ 100/390]  Loss: 0.3693 (0.263)  Acc@1: 89.0625 (90.6250)  Acc@5: 100.0000 (99.8298)LR: 1.150e-02
Train: 27 [ 150/390]  Loss: 0.5111 (0.266)  Acc@1: 84.3750 (90.7078)  Acc@5: 98.4375 (99.8448)LR: 1.150e-02
Train: 27 [ 200/390]  Loss: 0.2040 (0.261)  Acc@1: 92.1875 (90.9593)  Acc@5: 100.0000 (99.8212)LR: 1.150e-02
Train: 27 [ 250/390]  Loss: 0.3854 (0.262)  Acc@1: 85.9375 (90.8865)  Acc@5: 100.0000 (99.8008)LR: 1.150e-02
Train: 27 [ 300/390]  Loss: 0.2434 (0.261)  Acc@1: 95.3125 (90.9261)  Acc@5: 100.0000 (99.8079)LR: 1.150e-02
Train: 27 [ 350/390]  Loss: 0.3007 (0.264)  Acc@1: 89.0625 (90.8565)  Acc@5: 100.0000 (99.7908)LR: 1.150e-02
Train: 27 [ 390/390]  Loss: 0.2133 (0.266)  Acc@1: 87.5000 (90.8080)  Acc@5: 100.0000 (99.7920)LR: 1.150e-02
train_acc 90.808000
Valid: 27 [   0/390]  Loss: 0.3149 (0.315)  Acc@1: 90.6250 (90.6250)  Acc@5: 100.0000 (100.0000)
Valid: 27 [  50/390]  Loss: 0.5457 (0.433)  Acc@1: 84.3750 (85.7537)  Acc@5: 100.0000 (99.3873)
Valid: 27 [ 100/390]  Loss: 0.2405 (0.434)  Acc@1: 90.6250 (86.2314)  Acc@5: 100.0000 (99.4740)
Valid: 27 [ 150/390]  Loss: 0.2260 (0.442)  Acc@1: 92.1875 (85.8340)  Acc@5: 100.0000 (99.4930)
Valid: 27 [ 200/390]  Loss: 0.3517 (0.436)  Acc@1: 90.6250 (86.0386)  Acc@5: 98.4375 (99.5180)
Valid: 27 [ 250/390]  Loss: 0.6197 (0.436)  Acc@1: 81.2500 (85.9126)  Acc@5: 98.4375 (99.4522)
Valid: 27 [ 300/390]  Loss: 0.4994 (0.437)  Acc@1: 81.2500 (85.8700)  Acc@5: 98.4375 (99.4601)
Valid: 27 [ 350/390]  Loss: 0.5099 (0.433)  Acc@1: 84.3750 (85.9509)  Acc@5: 100.0000 (99.4881)
Valid: 27 [ 390/390]  Loss: 0.6649 (0.430)  Acc@1: 80.0000 (86.0560)  Acc@5: 97.5000 (99.4920)
valid_acc 86.056000
epoch = 27   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_5x5', 3), ('max_pool_3x3', 1), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1390, 0.0693, 0.0540, 0.0867, 0.2061, 0.1693, 0.1478, 0.1278],
        [0.1921, 0.0545, 0.0487, 0.0716, 0.1866, 0.1490, 0.1519, 0.1456],
        [0.1971, 0.0824, 0.0589, 0.0986, 0.1359, 0.1409, 0.1525, 0.1338],
        [0.2056, 0.0601, 0.0530, 0.0782, 0.1833, 0.1568, 0.1264, 0.1366],
        [0.2527, 0.0549, 0.0442, 0.0848, 0.1380, 0.1419, 0.1434, 0.1399],
        [0.2255, 0.0764, 0.0608, 0.0947, 0.1473, 0.1442, 0.1257, 0.1254],
        [0.2163, 0.0584, 0.0548, 0.0788, 0.1590, 0.1998, 0.1137, 0.1192],
        [0.2726, 0.0555, 0.0464, 0.0868, 0.1384, 0.1327, 0.1329, 0.1346],
        [0.3041, 0.0452, 0.0418, 0.0623, 0.1376, 0.1268, 0.1398, 0.1424],
        [0.2915, 0.0723, 0.0572, 0.0886, 0.1380, 0.1166, 0.1193, 0.1164],
        [0.2751, 0.0560, 0.0516, 0.0767, 0.1783, 0.1059, 0.1278, 0.1285],
        [0.3516, 0.0515, 0.0443, 0.0846, 0.1125, 0.1018, 0.1262, 0.1275],
        [0.4207, 0.0410, 0.0387, 0.0571, 0.1044, 0.0997, 0.1130, 0.1255],
        [0.4079, 0.0375, 0.0344, 0.0452, 0.1122, 0.1091, 0.1093, 0.1445]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1011, 0.1650, 0.1328, 0.1081, 0.1326, 0.1253, 0.1222, 0.1130],
        [0.1410, 0.1331, 0.1210, 0.1212, 0.1127, 0.1228, 0.1242, 0.1240],
        [0.1012, 0.1537, 0.1305, 0.1155, 0.1110, 0.1438, 0.1357, 0.1086],
        [0.1276, 0.1423, 0.1323, 0.1227, 0.1054, 0.1257, 0.1457, 0.0982],
        [0.1407, 0.1023, 0.0874, 0.1326, 0.1264, 0.1220, 0.1360, 0.1526],
        [0.1018, 0.1572, 0.1261, 0.1391, 0.1178, 0.1269, 0.1226, 0.1087],
        [0.1165, 0.1419, 0.1341, 0.1297, 0.1147, 0.1079, 0.1236, 0.1315],
        [0.1329, 0.1001, 0.0904, 0.1383, 0.1527, 0.1318, 0.1254, 0.1284],
        [0.1464, 0.0910, 0.0881, 0.1311, 0.1257, 0.1185, 0.1448, 0.1543],
        [0.1086, 0.1517, 0.1293, 0.1133, 0.1217, 0.1354, 0.1202, 0.1197],
        [0.1176, 0.1551, 0.1434, 0.1102, 0.1216, 0.1229, 0.1182, 0.1110],
        [0.1442, 0.0973, 0.0915, 0.1524, 0.1323, 0.1214, 0.1134, 0.1474],
        [0.1479, 0.0914, 0.0885, 0.1426, 0.1284, 0.1194, 0.1547, 0.1271],
        [0.1607, 0.0881, 0.0860, 0.1357, 0.1259, 0.1327, 0.1354, 0.1356]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 28 [   0/390]  Loss: 0.2570 (0.257)  Acc@1: 90.6250 (90.6250)  Acc@5: 100.0000 (100.0000)LR: 1.075e-02
Train: 28 [  50/390]  Loss: 0.2523 (0.289)  Acc@1: 92.1875 (90.3186)  Acc@5: 98.4375 (99.7855)LR: 1.075e-02
Train: 28 [ 100/390]  Loss: 0.3383 (0.280)  Acc@1: 85.9375 (90.1454)  Acc@5: 100.0000 (99.7215)LR: 1.075e-02
Train: 28 [ 150/390]  Loss: 0.1842 (0.270)  Acc@1: 93.7500 (90.4594)  Acc@5: 100.0000 (99.7930)LR: 1.075e-02
Train: 28 [ 200/390]  Loss: 0.2864 (0.266)  Acc@1: 90.6250 (90.5861)  Acc@5: 100.0000 (99.7901)LR: 1.075e-02
Train: 28 [ 250/390]  Loss: 0.1982 (0.264)  Acc@1: 92.1875 (90.5752)  Acc@5: 100.0000 (99.8070)LR: 1.075e-02
Train: 28 [ 300/390]  Loss: 0.1201 (0.262)  Acc@1: 93.7500 (90.6510)  Acc@5: 100.0000 (99.8079)LR: 1.075e-02
Train: 28 [ 350/390]  Loss: 0.2880 (0.261)  Acc@1: 89.0625 (90.6651)  Acc@5: 100.0000 (99.7952)LR: 1.075e-02
Train: 28 [ 390/390]  Loss: 0.5492 (0.261)  Acc@1: 82.5000 (90.6560)  Acc@5: 100.0000 (99.8040)LR: 1.075e-02
train_acc 90.656000
Valid: 28 [   0/390]  Loss: 0.2552 (0.255)  Acc@1: 92.1875 (92.1875)  Acc@5: 100.0000 (100.0000)
Valid: 28 [  50/390]  Loss: 0.1994 (0.381)  Acc@1: 89.0625 (87.7145)  Acc@5: 100.0000 (99.4485)
Valid: 28 [ 100/390]  Loss: 0.2905 (0.395)  Acc@1: 93.7500 (87.3144)  Acc@5: 100.0000 (99.4431)
Valid: 28 [ 150/390]  Loss: 0.4115 (0.392)  Acc@1: 89.0625 (87.2310)  Acc@5: 98.4375 (99.4619)
Valid: 28 [ 200/390]  Loss: 0.3000 (0.401)  Acc@1: 90.6250 (86.9636)  Acc@5: 100.0000 (99.4636)
Valid: 28 [ 250/390]  Loss: 0.2817 (0.402)  Acc@1: 92.1875 (86.9335)  Acc@5: 98.4375 (99.4460)
Valid: 28 [ 300/390]  Loss: 0.3741 (0.406)  Acc@1: 90.6250 (86.7940)  Acc@5: 100.0000 (99.4342)
Valid: 28 [ 350/390]  Loss: 0.5183 (0.404)  Acc@1: 85.9375 (86.9079)  Acc@5: 98.4375 (99.4079)
Valid: 28 [ 390/390]  Loss: 0.2690 (0.402)  Acc@1: 92.5000 (86.9440)  Acc@5: 100.0000 (99.4160)
valid_acc 86.944000
epoch = 28   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('sep_conv_3x3', 2), ('dil_conv_3x3', 3), ('skip_connect', 2)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1392, 0.0678, 0.0531, 0.0865, 0.2097, 0.1688, 0.1480, 0.1270],
        [0.1960, 0.0526, 0.0472, 0.0699, 0.1884, 0.1474, 0.1525, 0.1460],
        [0.2013, 0.0813, 0.0584, 0.0994, 0.1351, 0.1400, 0.1534, 0.1311],
        [0.2122, 0.0589, 0.0519, 0.0776, 0.1823, 0.1545, 0.1260, 0.1366],
        [0.2600, 0.0532, 0.0429, 0.0833, 0.1383, 0.1405, 0.1433, 0.1385],
        [0.2301, 0.0755, 0.0603, 0.0959, 0.1477, 0.1436, 0.1232, 0.1236],
        [0.2225, 0.0567, 0.0535, 0.0781, 0.1588, 0.1997, 0.1133, 0.1174],
        [0.2838, 0.0539, 0.0451, 0.0862, 0.1362, 0.1295, 0.1315, 0.1338],
        [0.3164, 0.0433, 0.0402, 0.0604, 0.1349, 0.1246, 0.1380, 0.1422],
        [0.3006, 0.0707, 0.0564, 0.0890, 0.1364, 0.1135, 0.1179, 0.1155],
        [0.2894, 0.0544, 0.0504, 0.0758, 0.1742, 0.1030, 0.1267, 0.1261],
        [0.3677, 0.0497, 0.0428, 0.0837, 0.1096, 0.0975, 0.1238, 0.1252],
        [0.4413, 0.0390, 0.0369, 0.0549, 0.0995, 0.0966, 0.1097, 0.1221],
        [0.4265, 0.0356, 0.0328, 0.0434, 0.1083, 0.1065, 0.1061, 0.1408]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1002, 0.1665, 0.1330, 0.1068, 0.1342, 0.1252, 0.1223, 0.1118],
        [0.1414, 0.1321, 0.1207, 0.1203, 0.1136, 0.1223, 0.1241, 0.1256],
        [0.1000, 0.1543, 0.1307, 0.1165, 0.1109, 0.1431, 0.1365, 0.1080],
        [0.1280, 0.1413, 0.1329, 0.1236, 0.1042, 0.1251, 0.1469, 0.0979],
        [0.1425, 0.1011, 0.0870, 0.1344, 0.1261, 0.1220, 0.1364, 0.1504],
        [0.1002, 0.1580, 0.1264, 0.1389, 0.1167, 0.1284, 0.1231, 0.1084],
        [0.1158, 0.1412, 0.1349, 0.1283, 0.1157, 0.1074, 0.1240, 0.1327],
        [0.1324, 0.0994, 0.0905, 0.1399, 0.1544, 0.1316, 0.1242, 0.1275],
        [0.1483, 0.0896, 0.0877, 0.1321, 0.1249, 0.1186, 0.1445, 0.1543],
        [0.1070, 0.1523, 0.1301, 0.1141, 0.1226, 0.1355, 0.1201, 0.1183],
        [0.1173, 0.1541, 0.1441, 0.1091, 0.1212, 0.1231, 0.1193, 0.1118],
        [0.1456, 0.0958, 0.0912, 0.1553, 0.1323, 0.1214, 0.1123, 0.1461],
        [0.1505, 0.0892, 0.0876, 0.1440, 0.1286, 0.1192, 0.1556, 0.1252],
        [0.1622, 0.0857, 0.0849, 0.1358, 0.1271, 0.1326, 0.1358, 0.1360]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 29 [   0/390]  Loss: 0.2267 (0.227)  Acc@1: 90.6250 (90.6250)  Acc@5: 100.0000 (100.0000)LR: 1.002e-02
Train: 29 [  50/390]  Loss: 0.2890 (0.248)  Acc@1: 89.0625 (91.1765)  Acc@5: 100.0000 (99.8468)LR: 1.002e-02
Train: 29 [ 100/390]  Loss: 0.2730 (0.258)  Acc@1: 90.6250 (91.0736)  Acc@5: 100.0000 (99.8608)LR: 1.002e-02
Train: 29 [ 150/390]  Loss: 0.1238 (0.260)  Acc@1: 96.8750 (90.8837)  Acc@5: 100.0000 (99.8344)LR: 1.002e-02
Train: 29 [ 200/390]  Loss: 0.2774 (0.261)  Acc@1: 89.0625 (90.7727)  Acc@5: 100.0000 (99.8212)LR: 1.002e-02
Train: 29 [ 250/390]  Loss: 0.1537 (0.264)  Acc@1: 92.1875 (90.6188)  Acc@5: 100.0000 (99.8070)LR: 1.002e-02
Train: 29 [ 300/390]  Loss: 0.2699 (0.263)  Acc@1: 90.6250 (90.7444)  Acc@5: 100.0000 (99.8027)LR: 1.002e-02
Train: 29 [ 350/390]  Loss: 0.3652 (0.264)  Acc@1: 89.0625 (90.6829)  Acc@5: 98.4375 (99.7908)LR: 1.002e-02
Train: 29 [ 390/390]  Loss: 0.2743 (0.265)  Acc@1: 85.0000 (90.6800)  Acc@5: 100.0000 (99.7840)LR: 1.002e-02
train_acc 90.680000
Valid: 29 [   0/390]  Loss: 0.2728 (0.273)  Acc@1: 89.0625 (89.0625)  Acc@5: 100.0000 (100.0000)
Valid: 29 [  50/390]  Loss: 0.3066 (0.389)  Acc@1: 93.7500 (87.5919)  Acc@5: 100.0000 (99.5711)
Valid: 29 [ 100/390]  Loss: 0.4824 (0.393)  Acc@1: 82.8125 (86.9585)  Acc@5: 100.0000 (99.5978)
Valid: 29 [ 150/390]  Loss: 0.2724 (0.386)  Acc@1: 93.7500 (87.0964)  Acc@5: 100.0000 (99.6275)
Valid: 29 [ 200/390]  Loss: 0.4797 (0.383)  Acc@1: 81.2500 (87.1424)  Acc@5: 98.4375 (99.5880)
Valid: 29 [ 250/390]  Loss: 0.3829 (0.388)  Acc@1: 87.5000 (87.1265)  Acc@5: 100.0000 (99.5020)
Valid: 29 [ 300/390]  Loss: 0.2186 (0.392)  Acc@1: 90.6250 (87.0484)  Acc@5: 100.0000 (99.4913)
Valid: 29 [ 350/390]  Loss: 0.4146 (0.393)  Acc@1: 84.3750 (87.0860)  Acc@5: 100.0000 (99.4970)
Valid: 29 [ 390/390]  Loss: 0.1919 (0.391)  Acc@1: 95.0000 (87.1920)  Acc@5: 100.0000 (99.4760)
valid_acc 87.192000
epoch = 29   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('sep_conv_3x3', 2), ('skip_connect', 2), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1389, 0.0661, 0.0519, 0.0855, 0.2116, 0.1699, 0.1484, 0.1276],
        [0.2010, 0.0510, 0.0459, 0.0686, 0.1885, 0.1447, 0.1539, 0.1465],
        [0.2050, 0.0799, 0.0575, 0.0989, 0.1342, 0.1405, 0.1541, 0.1300],
        [0.2191, 0.0572, 0.0506, 0.0764, 0.1819, 0.1535, 0.1255, 0.1358],
        [0.2656, 0.0514, 0.0418, 0.0820, 0.1369, 0.1396, 0.1436, 0.1392],
        [0.2362, 0.0746, 0.0595, 0.0958, 0.1476, 0.1413, 0.1233, 0.1218],
        [0.2306, 0.0550, 0.0520, 0.0769, 0.1582, 0.1977, 0.1128, 0.1167],
        [0.2919, 0.0523, 0.0439, 0.0847, 0.1367, 0.1282, 0.1289, 0.1334],
        [0.3278, 0.0418, 0.0390, 0.0589, 0.1311, 0.1240, 0.1359, 0.1415],
        [0.3111, 0.0693, 0.0553, 0.0883, 0.1344, 0.1118, 0.1155, 0.1144],
        [0.3039, 0.0524, 0.0488, 0.0742, 0.1712, 0.1020, 0.1238, 0.1237],
        [0.3809, 0.0479, 0.0416, 0.0819, 0.1069, 0.0951, 0.1218, 0.1239],
        [0.4600, 0.0374, 0.0356, 0.0532, 0.0956, 0.0929, 0.1063, 0.1190],
        [0.4466, 0.0340, 0.0316, 0.0419, 0.1035, 0.1029, 0.1023, 0.1372]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0990, 0.1670, 0.1335, 0.1063, 0.1343, 0.1251, 0.1227, 0.1121],
        [0.1428, 0.1304, 0.1196, 0.1206, 0.1152, 0.1228, 0.1240, 0.1245],
        [0.0995, 0.1546, 0.1314, 0.1163, 0.1099, 0.1444, 0.1363, 0.1076],
        [0.1285, 0.1405, 0.1328, 0.1241, 0.1033, 0.1245, 0.1479, 0.0984],
        [0.1424, 0.0997, 0.0862, 0.1342, 0.1268, 0.1230, 0.1358, 0.1518],
        [0.0992, 0.1590, 0.1276, 0.1385, 0.1154, 0.1290, 0.1235, 0.1078],
        [0.1160, 0.1406, 0.1347, 0.1288, 0.1150, 0.1066, 0.1241, 0.1343],
        [0.1327, 0.0987, 0.0907, 0.1411, 0.1538, 0.1318, 0.1237, 0.1276],
        [0.1479, 0.0888, 0.0884, 0.1335, 0.1252, 0.1182, 0.1449, 0.1529],
        [0.1061, 0.1528, 0.1309, 0.1142, 0.1225, 0.1357, 0.1198, 0.1181],
        [0.1181, 0.1531, 0.1439, 0.1088, 0.1216, 0.1239, 0.1195, 0.1110],
        [0.1462, 0.0946, 0.0908, 0.1564, 0.1322, 0.1207, 0.1124, 0.1467],
        [0.1526, 0.0879, 0.0878, 0.1457, 0.1277, 0.1182, 0.1559, 0.1243],
        [0.1624, 0.0843, 0.0845, 0.1361, 0.1263, 0.1336, 0.1361, 0.1367]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 30 [   0/390]  Loss: 0.2548 (0.255)  Acc@1: 92.1875 (92.1875)  Acc@5: 100.0000 (100.0000)LR: 9.292e-03
Train: 30 [  50/390]  Loss: 0.2917 (0.235)  Acc@1: 93.7500 (91.6973)  Acc@5: 100.0000 (99.7855)LR: 9.292e-03
Train: 30 [ 100/390]  Loss: 0.1866 (0.232)  Acc@1: 95.3125 (91.8472)  Acc@5: 100.0000 (99.8608)LR: 9.292e-03
Train: 30 [ 150/390]  Loss: 0.2901 (0.240)  Acc@1: 93.7500 (91.5873)  Acc@5: 98.4375 (99.8758)LR: 9.292e-03
Train: 30 [ 200/390]  Loss: 0.4393 (0.245)  Acc@1: 79.6875 (91.2313)  Acc@5: 100.0000 (99.8834)LR: 9.292e-03
Train: 30 [ 250/390]  Loss: 0.2176 (0.247)  Acc@1: 90.6250 (91.2413)  Acc@5: 100.0000 (99.8568)LR: 9.292e-03
Train: 30 [ 300/390]  Loss: 0.3419 (0.245)  Acc@1: 84.3750 (91.3050)  Acc@5: 98.4375 (99.8495)LR: 9.292e-03
Train: 30 [ 350/390]  Loss: 0.1261 (0.247)  Acc@1: 96.8750 (91.2126)  Acc@5: 100.0000 (99.8442)LR: 9.292e-03
Train: 30 [ 390/390]  Loss: 0.2616 (0.249)  Acc@1: 95.0000 (91.1520)  Acc@5: 100.0000 (99.8400)LR: 9.292e-03
train_acc 91.152000
Valid: 30 [   0/390]  Loss: 0.5348 (0.535)  Acc@1: 84.3750 (84.3750)  Acc@5: 98.4375 (98.4375)
Valid: 30 [  50/390]  Loss: 0.4723 (0.418)  Acc@1: 87.5000 (86.2132)  Acc@5: 100.0000 (99.3566)
Valid: 30 [ 100/390]  Loss: 0.5603 (0.398)  Acc@1: 76.5625 (87.0668)  Acc@5: 100.0000 (99.4431)
Valid: 30 [ 150/390]  Loss: 0.3007 (0.404)  Acc@1: 90.6250 (86.8998)  Acc@5: 98.4375 (99.4205)
Valid: 30 [ 200/390]  Loss: 0.5745 (0.397)  Acc@1: 79.6875 (86.9014)  Acc@5: 98.4375 (99.4636)
Valid: 30 [ 250/390]  Loss: 0.4349 (0.406)  Acc@1: 89.0625 (86.7779)  Acc@5: 100.0000 (99.4335)
Valid: 30 [ 300/390]  Loss: 0.3431 (0.401)  Acc@1: 87.5000 (86.8304)  Acc@5: 100.0000 (99.4498)
Valid: 30 [ 350/390]  Loss: 0.4828 (0.401)  Acc@1: 92.1875 (86.8100)  Acc@5: 96.8750 (99.4525)
Valid: 30 [ 390/390]  Loss: 0.1713 (0.402)  Acc@1: 92.5000 (86.8600)  Acc@5: 100.0000 (99.4400)
valid_acc 86.860000
epoch = 30   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('sep_conv_3x3', 2), ('skip_connect', 2), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1387, 0.0648, 0.0513, 0.0854, 0.2147, 0.1703, 0.1482, 0.1267],
        [0.2063, 0.0494, 0.0449, 0.0676, 0.1882, 0.1433, 0.1540, 0.1463],
        [0.2091, 0.0783, 0.0568, 0.0989, 0.1328, 0.1399, 0.1548, 0.1294],
        [0.2260, 0.0560, 0.0498, 0.0761, 0.1806, 0.1519, 0.1248, 0.1348],
        [0.2718, 0.0497, 0.0408, 0.0805, 0.1377, 0.1383, 0.1426, 0.1386],
        [0.2407, 0.0736, 0.0591, 0.0964, 0.1484, 0.1396, 0.1224, 0.1198],
        [0.2388, 0.0536, 0.0511, 0.0763, 0.1569, 0.1959, 0.1114, 0.1160],
        [0.3005, 0.0508, 0.0428, 0.0833, 0.1368, 0.1268, 0.1272, 0.1318],
        [0.3407, 0.0404, 0.0380, 0.0575, 0.1285, 0.1208, 0.1345, 0.1396],
        [0.3205, 0.0679, 0.0547, 0.0885, 0.1331, 0.1101, 0.1120, 0.1132],
        [0.3172, 0.0508, 0.0475, 0.0729, 0.1689, 0.1002, 0.1219, 0.1206],
        [0.3973, 0.0459, 0.0404, 0.0804, 0.1031, 0.0927, 0.1191, 0.1212],
        [0.4790, 0.0356, 0.0344, 0.0516, 0.0923, 0.0894, 0.1024, 0.1153],
        [0.4641, 0.0323, 0.0303, 0.0402, 0.1001, 0.1006, 0.0986, 0.1337]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0978, 0.1676, 0.1349, 0.1062, 0.1339, 0.1257, 0.1230, 0.1110],
        [0.1434, 0.1297, 0.1199, 0.1214, 0.1147, 0.1225, 0.1242, 0.1242],
        [0.0997, 0.1552, 0.1329, 0.1165, 0.1099, 0.1435, 0.1356, 0.1066],
        [0.1284, 0.1404, 0.1338, 0.1258, 0.1026, 0.1227, 0.1486, 0.0977],
        [0.1421, 0.0981, 0.0860, 0.1347, 0.1260, 0.1240, 0.1357, 0.1534],
        [0.0984, 0.1597, 0.1292, 0.1395, 0.1140, 0.1292, 0.1233, 0.1067],
        [0.1153, 0.1407, 0.1354, 0.1297, 0.1147, 0.1057, 0.1233, 0.1352],
        [0.1329, 0.0980, 0.0912, 0.1434, 0.1545, 0.1296, 0.1235, 0.1268],
        [0.1499, 0.0876, 0.0888, 0.1354, 0.1257, 0.1171, 0.1436, 0.1519],
        [0.1058, 0.1516, 0.1309, 0.1156, 0.1234, 0.1353, 0.1195, 0.1179],
        [0.1172, 0.1538, 0.1450, 0.1078, 0.1215, 0.1232, 0.1204, 0.1111],
        [0.1464, 0.0931, 0.0903, 0.1573, 0.1317, 0.1219, 0.1122, 0.1471],
        [0.1537, 0.0862, 0.0872, 0.1458, 0.1282, 0.1177, 0.1572, 0.1242],
        [0.1649, 0.0823, 0.0839, 0.1364, 0.1274, 0.1340, 0.1346, 0.1365]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 31 [   0/390]  Loss: 0.1269 (0.127)  Acc@1: 95.3125 (95.3125)  Acc@5: 100.0000 (100.0000)LR: 8.583e-03
Train: 31 [  50/390]  Loss: 0.3426 (0.222)  Acc@1: 85.9375 (92.0650)  Acc@5: 100.0000 (99.8775)LR: 8.583e-03
Train: 31 [ 100/390]  Loss: 0.1570 (0.230)  Acc@1: 95.3125 (91.9554)  Acc@5: 100.0000 (99.8762)LR: 8.583e-03
Train: 31 [ 150/390]  Loss: 0.1448 (0.227)  Acc@1: 95.3125 (92.0012)  Acc@5: 98.4375 (99.8448)LR: 8.583e-03
Train: 31 [ 200/390]  Loss: 0.3348 (0.225)  Acc@1: 84.3750 (92.1253)  Acc@5: 100.0000 (99.8368)LR: 8.583e-03
Train: 31 [ 250/390]  Loss: 0.3624 (0.229)  Acc@1: 85.9375 (92.0754)  Acc@5: 100.0000 (99.8008)LR: 8.583e-03
Train: 31 [ 300/390]  Loss: 0.5097 (0.236)  Acc@1: 82.8125 (91.7670)  Acc@5: 100.0000 (99.8287)LR: 8.583e-03
Train: 31 [ 350/390]  Loss: 0.1967 (0.235)  Acc@1: 93.7500 (91.8002)  Acc@5: 100.0000 (99.8130)LR: 8.583e-03
Train: 31 [ 390/390]  Loss: 0.3204 (0.238)  Acc@1: 92.5000 (91.7200)  Acc@5: 97.5000 (99.8160)LR: 8.583e-03
train_acc 91.720000
Valid: 31 [   0/390]  Loss: 0.2575 (0.258)  Acc@1: 92.1875 (92.1875)  Acc@5: 100.0000 (100.0000)
Valid: 31 [  50/390]  Loss: 0.1559 (0.399)  Acc@1: 95.3125 (86.7341)  Acc@5: 100.0000 (99.3566)
Valid: 31 [ 100/390]  Loss: 0.5784 (0.391)  Acc@1: 84.3750 (87.2679)  Acc@5: 96.8750 (99.4276)
Valid: 31 [ 150/390]  Loss: 0.4452 (0.383)  Acc@1: 84.3750 (87.5310)  Acc@5: 100.0000 (99.4309)
Valid: 31 [ 200/390]  Loss: 0.5335 (0.385)  Acc@1: 82.8125 (87.4378)  Acc@5: 98.4375 (99.4481)
Valid: 31 [ 250/390]  Loss: 0.3082 (0.383)  Acc@1: 89.0625 (87.6681)  Acc@5: 100.0000 (99.4584)
Valid: 31 [ 300/390]  Loss: 0.2842 (0.383)  Acc@1: 89.0625 (87.5623)  Acc@5: 100.0000 (99.4238)
Valid: 31 [ 350/390]  Loss: 0.5776 (0.386)  Acc@1: 84.3750 (87.5089)  Acc@5: 98.4375 (99.4035)
Valid: 31 [ 390/390]  Loss: 0.1511 (0.382)  Acc@1: 97.5000 (87.5800)  Acc@5: 100.0000 (99.4120)
valid_acc 87.580000
epoch = 31   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('sep_conv_3x3', 2), ('dil_conv_3x3', 3), ('skip_connect', 2)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1394, 0.0632, 0.0506, 0.0852, 0.2173, 0.1699, 0.1486, 0.1259],
        [0.2099, 0.0481, 0.0441, 0.0668, 0.1893, 0.1418, 0.1541, 0.1459],
        [0.2133, 0.0768, 0.0561, 0.0988, 0.1326, 0.1378, 0.1553, 0.1293],
        [0.2327, 0.0546, 0.0489, 0.0754, 0.1804, 0.1499, 0.1244, 0.1338],
        [0.2834, 0.0485, 0.0399, 0.0801, 0.1359, 0.1355, 0.1398, 0.1369],
        [0.2448, 0.0722, 0.0586, 0.0969, 0.1483, 0.1376, 0.1235, 0.1181],
        [0.2457, 0.0525, 0.0503, 0.0756, 0.1559, 0.1929, 0.1110, 0.1160],
        [0.3115, 0.0495, 0.0418, 0.0822, 0.1363, 0.1252, 0.1230, 0.1306],
        [0.3559, 0.0389, 0.0369, 0.0562, 0.1261, 0.1171, 0.1307, 0.1382],
        [0.3306, 0.0662, 0.0541, 0.0883, 0.1316, 0.1084, 0.1091, 0.1117],
        [0.3285, 0.0493, 0.0467, 0.0719, 0.1686, 0.0987, 0.1196, 0.1167],
        [0.4138, 0.0441, 0.0391, 0.0785, 0.1002, 0.0896, 0.1156, 0.1191],
        [0.4997, 0.0340, 0.0331, 0.0496, 0.0883, 0.0863, 0.0988, 0.1102],
        [0.4852, 0.0308, 0.0292, 0.0389, 0.0958, 0.0966, 0.0949, 0.1285]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0968, 0.1687, 0.1350, 0.1048, 0.1352, 0.1258, 0.1236, 0.1102],
        [0.1447, 0.1288, 0.1195, 0.1228, 0.1139, 0.1215, 0.1243, 0.1245],
        [0.0991, 0.1561, 0.1333, 0.1167, 0.1090, 0.1442, 0.1361, 0.1056],
        [0.1284, 0.1396, 0.1335, 0.1268, 0.1026, 0.1209, 0.1498, 0.0984],
        [0.1429, 0.0971, 0.0858, 0.1352, 0.1258, 0.1241, 0.1348, 0.1543],
        [0.0977, 0.1594, 0.1288, 0.1401, 0.1137, 0.1302, 0.1233, 0.1067],
        [0.1149, 0.1391, 0.1344, 0.1320, 0.1149, 0.1052, 0.1231, 0.1364],
        [0.1324, 0.0963, 0.0901, 0.1425, 0.1545, 0.1300, 0.1254, 0.1287],
        [0.1504, 0.0861, 0.0882, 0.1353, 0.1253, 0.1176, 0.1454, 0.1518],
        [0.1056, 0.1509, 0.1305, 0.1154, 0.1234, 0.1359, 0.1202, 0.1181],
        [0.1179, 0.1523, 0.1441, 0.1084, 0.1211, 0.1233, 0.1208, 0.1121],
        [0.1478, 0.0911, 0.0892, 0.1575, 0.1322, 0.1209, 0.1127, 0.1485],
        [0.1555, 0.0841, 0.0859, 0.1452, 0.1304, 0.1181, 0.1583, 0.1223],
        [0.1656, 0.0804, 0.0830, 0.1359, 0.1270, 0.1345, 0.1356, 0.1379]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 32 [   0/390]  Loss: 0.1848 (0.185)  Acc@1: 93.7500 (93.7500)  Acc@5: 100.0000 (100.0000)LR: 7.891e-03
Train: 32 [  50/390]  Loss: 0.2396 (0.238)  Acc@1: 89.0625 (91.6054)  Acc@5: 100.0000 (99.8162)LR: 7.891e-03
Train: 32 [ 100/390]  Loss: 0.2260 (0.228)  Acc@1: 87.5000 (92.0483)  Acc@5: 100.0000 (99.8144)LR: 7.891e-03
Train: 32 [ 150/390]  Loss: 0.1539 (0.226)  Acc@1: 95.3125 (91.9599)  Acc@5: 100.0000 (99.8448)LR: 7.891e-03
Train: 32 [ 200/390]  Loss: 0.1506 (0.227)  Acc@1: 93.7500 (91.9232)  Acc@5: 100.0000 (99.8523)LR: 7.891e-03
Train: 32 [ 250/390]  Loss: 0.1428 (0.228)  Acc@1: 95.3125 (91.8949)  Acc@5: 100.0000 (99.8630)LR: 7.891e-03
Train: 32 [ 300/390]  Loss: 0.2657 (0.231)  Acc@1: 90.6250 (91.8397)  Acc@5: 100.0000 (99.8598)LR: 7.891e-03
Train: 32 [ 350/390]  Loss: 0.2541 (0.233)  Acc@1: 87.5000 (91.7201)  Acc@5: 100.0000 (99.8665)LR: 7.891e-03
Train: 32 [ 390/390]  Loss: 0.2600 (0.233)  Acc@1: 87.5000 (91.7320)  Acc@5: 100.0000 (99.8720)LR: 7.891e-03
train_acc 91.732000
Valid: 32 [   0/390]  Loss: 0.2947 (0.295)  Acc@1: 87.5000 (87.5000)  Acc@5: 100.0000 (100.0000)
Valid: 32 [  50/390]  Loss: 0.3498 (0.412)  Acc@1: 87.5000 (87.0404)  Acc@5: 98.4375 (99.3260)
Valid: 32 [ 100/390]  Loss: 0.3787 (0.407)  Acc@1: 85.9375 (86.9895)  Acc@5: 100.0000 (99.3193)
Valid: 32 [ 150/390]  Loss: 0.2415 (0.410)  Acc@1: 89.0625 (86.7550)  Acc@5: 100.0000 (99.3998)
Valid: 32 [ 200/390]  Loss: 0.2512 (0.410)  Acc@1: 85.9375 (86.7149)  Acc@5: 100.0000 (99.3937)
Valid: 32 [ 250/390]  Loss: 0.5656 (0.413)  Acc@1: 85.9375 (86.7468)  Acc@5: 98.4375 (99.3588)
Valid: 32 [ 300/390]  Loss: 0.4814 (0.408)  Acc@1: 90.6250 (86.8148)  Acc@5: 100.0000 (99.3511)
Valid: 32 [ 350/390]  Loss: 0.3337 (0.405)  Acc@1: 89.0625 (86.8857)  Acc@5: 98.4375 (99.3812)
Valid: 32 [ 390/390]  Loss: 0.4220 (0.405)  Acc@1: 87.5000 (86.8520)  Acc@5: 100.0000 (99.3880)
valid_acc 86.852000
epoch = 32   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('sep_conv_3x3', 2), ('skip_connect', 2), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1395, 0.0618, 0.0501, 0.0854, 0.2199, 0.1712, 0.1475, 0.1247],
        [0.2147, 0.0465, 0.0429, 0.0652, 0.1897, 0.1408, 0.1535, 0.1467],
        [0.2182, 0.0752, 0.0554, 0.0991, 0.1326, 0.1363, 0.1555, 0.1277],
        [0.2393, 0.0534, 0.0483, 0.0753, 0.1803, 0.1468, 0.1237, 0.1329],
        [0.2912, 0.0469, 0.0389, 0.0789, 0.1351, 0.1342, 0.1387, 0.1362],
        [0.2502, 0.0712, 0.0582, 0.0974, 0.1478, 0.1364, 0.1230, 0.1158],
        [0.2538, 0.0512, 0.0495, 0.0750, 0.1551, 0.1900, 0.1106, 0.1148],
        [0.3203, 0.0480, 0.0409, 0.0809, 0.1347, 0.1236, 0.1216, 0.1301],
        [0.3706, 0.0376, 0.0360, 0.0552, 0.1224, 0.1135, 0.1287, 0.1359],
        [0.3400, 0.0644, 0.0533, 0.0880, 0.1312, 0.1063, 0.1062, 0.1107],
        [0.3425, 0.0477, 0.0456, 0.0709, 0.1662, 0.0975, 0.1169, 0.1127],
        [0.4271, 0.0427, 0.0381, 0.0768, 0.0977, 0.0864, 0.1132, 0.1181],
        [0.5168, 0.0324, 0.0319, 0.0478, 0.0852, 0.0833, 0.0961, 0.1064],
        [0.5029, 0.0295, 0.0283, 0.0377, 0.0925, 0.0937, 0.0915, 0.1239]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0960, 0.1707, 0.1374, 0.1045, 0.1352, 0.1247, 0.1226, 0.1089],
        [0.1457, 0.1268, 0.1189, 0.1241, 0.1148, 0.1220, 0.1244, 0.1233],
        [0.0973, 0.1562, 0.1350, 0.1176, 0.1086, 0.1449, 0.1367, 0.1037],
        [0.1300, 0.1378, 0.1336, 0.1263, 0.1021, 0.1210, 0.1507, 0.0984],
        [0.1423, 0.0955, 0.0857, 0.1357, 0.1256, 0.1252, 0.1343, 0.1558],
        [0.0962, 0.1592, 0.1305, 0.1400, 0.1135, 0.1313, 0.1234, 0.1060],
        [0.1162, 0.1367, 0.1338, 0.1336, 0.1145, 0.1046, 0.1229, 0.1376],
        [0.1330, 0.0950, 0.0897, 0.1434, 0.1542, 0.1295, 0.1253, 0.1298],
        [0.1512, 0.0843, 0.0878, 0.1361, 0.1242, 0.1169, 0.1468, 0.1527],
        [0.1042, 0.1508, 0.1319, 0.1149, 0.1234, 0.1356, 0.1213, 0.1179],
        [0.1191, 0.1506, 0.1439, 0.1074, 0.1215, 0.1235, 0.1213, 0.1126],
        [0.1480, 0.0894, 0.0890, 0.1593, 0.1322, 0.1202, 0.1122, 0.1496],
        [0.1561, 0.0818, 0.0852, 0.1460, 0.1321, 0.1177, 0.1583, 0.1228],
        [0.1675, 0.0786, 0.0821, 0.1358, 0.1267, 0.1357, 0.1356, 0.1381]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 33 [   0/390]  Loss: 0.2482 (0.248)  Acc@1: 92.1875 (92.1875)  Acc@5: 100.0000 (100.0000)LR: 7.219e-03
Train: 33 [  50/390]  Loss: 0.2200 (0.221)  Acc@1: 90.6250 (92.4632)  Acc@5: 98.4375 (99.8775)LR: 7.219e-03
Train: 33 [ 100/390]  Loss: 0.3452 (0.221)  Acc@1: 87.5000 (92.2958)  Acc@5: 100.0000 (99.9381)LR: 7.219e-03
Train: 33 [ 150/390]  Loss: 0.1781 (0.222)  Acc@1: 93.7500 (92.1772)  Acc@5: 100.0000 (99.9172)LR: 7.219e-03
Train: 33 [ 200/390]  Loss: 0.1592 (0.224)  Acc@1: 95.3125 (92.0787)  Acc@5: 100.0000 (99.8989)LR: 7.219e-03
Train: 33 [ 250/390]  Loss: 0.2435 (0.226)  Acc@1: 92.1875 (92.0630)  Acc@5: 100.0000 (99.8817)LR: 7.219e-03
Train: 33 [ 300/390]  Loss: 0.1381 (0.225)  Acc@1: 95.3125 (92.1096)  Acc@5: 100.0000 (99.8702)LR: 7.219e-03
Train: 33 [ 350/390]  Loss: 0.2365 (0.228)  Acc@1: 90.6250 (91.9694)  Acc@5: 100.0000 (99.8531)LR: 7.219e-03
Train: 33 [ 390/390]  Loss: 0.2089 (0.228)  Acc@1: 90.0000 (91.9120)  Acc@5: 100.0000 (99.8560)LR: 7.219e-03
train_acc 91.912000
Valid: 33 [   0/390]  Loss: 0.2397 (0.240)  Acc@1: 93.7500 (93.7500)  Acc@5: 100.0000 (100.0000)
Valid: 33 [  50/390]  Loss: 0.6790 (0.382)  Acc@1: 82.8125 (87.4694)  Acc@5: 100.0000 (99.5098)
Valid: 33 [ 100/390]  Loss: 0.5298 (0.388)  Acc@1: 87.5000 (87.4845)  Acc@5: 96.8750 (99.3502)
Valid: 33 [ 150/390]  Loss: 0.2344 (0.399)  Acc@1: 93.7500 (86.9723)  Acc@5: 100.0000 (99.4102)
Valid: 33 [ 200/390]  Loss: 0.3500 (0.399)  Acc@1: 89.0625 (86.9092)  Acc@5: 98.4375 (99.4481)
Valid: 33 [ 250/390]  Loss: 0.3401 (0.401)  Acc@1: 89.0625 (86.9958)  Acc@5: 100.0000 (99.4460)
Valid: 33 [ 300/390]  Loss: 0.3213 (0.399)  Acc@1: 92.1875 (87.0484)  Acc@5: 98.4375 (99.4446)
Valid: 33 [ 350/390]  Loss: 0.2787 (0.398)  Acc@1: 93.7500 (87.0326)  Acc@5: 100.0000 (99.4658)
Valid: 33 [ 390/390]  Loss: 0.3686 (0.399)  Acc@1: 90.0000 (87.0360)  Acc@5: 100.0000 (99.4360)
valid_acc 87.036000
epoch = 33   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 0), ('dil_conv_5x5', 3), ('skip_connect', 2), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1398, 0.0597, 0.0489, 0.0842, 0.2229, 0.1728, 0.1474, 0.1242],
        [0.2186, 0.0451, 0.0419, 0.0642, 0.1892, 0.1391, 0.1549, 0.1470],
        [0.2241, 0.0733, 0.0540, 0.0978, 0.1321, 0.1350, 0.1566, 0.1272],
        [0.2467, 0.0522, 0.0473, 0.0744, 0.1792, 0.1454, 0.1231, 0.1318],
        [0.2974, 0.0455, 0.0380, 0.0779, 0.1342, 0.1333, 0.1380, 0.1357],
        [0.2583, 0.0693, 0.0570, 0.0964, 0.1477, 0.1360, 0.1217, 0.1138],
        [0.2623, 0.0500, 0.0486, 0.0744, 0.1552, 0.1863, 0.1099, 0.1133],
        [0.3328, 0.0465, 0.0399, 0.0801, 0.1334, 0.1204, 0.1190, 0.1279],
        [0.3825, 0.0363, 0.0349, 0.0537, 0.1200, 0.1112, 0.1269, 0.1345],
        [0.3531, 0.0625, 0.0519, 0.0864, 0.1288, 0.1037, 0.1044, 0.1092],
        [0.3554, 0.0464, 0.0444, 0.0694, 0.1626, 0.0967, 0.1150, 0.1101],
        [0.4426, 0.0412, 0.0372, 0.0754, 0.0943, 0.0823, 0.1104, 0.1166],
        [0.5352, 0.0310, 0.0305, 0.0459, 0.0820, 0.0802, 0.0936, 0.1016],
        [0.5222, 0.0281, 0.0271, 0.0362, 0.0885, 0.0902, 0.0877, 0.1201]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0958, 0.1718, 0.1381, 0.1038, 0.1341, 0.1245, 0.1227, 0.1092],
        [0.1459, 0.1261, 0.1186, 0.1249, 0.1144, 0.1223, 0.1243, 0.1234],
        [0.0961, 0.1550, 0.1349, 0.1182, 0.1088, 0.1474, 0.1366, 0.1030],
        [0.1307, 0.1370, 0.1340, 0.1265, 0.1014, 0.1208, 0.1523, 0.0973],
        [0.1421, 0.0934, 0.0850, 0.1352, 0.1266, 0.1265, 0.1339, 0.1574],
        [0.0961, 0.1582, 0.1297, 0.1402, 0.1136, 0.1317, 0.1241, 0.1064],
        [0.1167, 0.1367, 0.1344, 0.1338, 0.1139, 0.1040, 0.1229, 0.1377],
        [0.1335, 0.0936, 0.0890, 0.1434, 0.1534, 0.1304, 0.1263, 0.1304],
        [0.1499, 0.0827, 0.0875, 0.1364, 0.1239, 0.1175, 0.1476, 0.1544],
        [0.1047, 0.1496, 0.1316, 0.1157, 0.1242, 0.1342, 0.1226, 0.1174],
        [0.1191, 0.1502, 0.1443, 0.1074, 0.1219, 0.1233, 0.1208, 0.1130],
        [0.1502, 0.0873, 0.0878, 0.1603, 0.1315, 0.1204, 0.1131, 0.1493],
        [0.1558, 0.0797, 0.0843, 0.1461, 0.1338, 0.1192, 0.1592, 0.1219],
        [0.1687, 0.0768, 0.0813, 0.1358, 0.1275, 0.1367, 0.1358, 0.1374]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 34 [   0/390]  Loss: 0.1426 (0.143)  Acc@1: 95.3125 (95.3125)  Acc@5: 100.0000 (100.0000)LR: 6.570e-03
Train: 34 [  50/390]  Loss: 0.2088 (0.230)  Acc@1: 92.1875 (92.3100)  Acc@5: 100.0000 (99.7855)LR: 6.570e-03
Train: 34 [ 100/390]  Loss: 0.1644 (0.224)  Acc@1: 95.3125 (92.2184)  Acc@5: 100.0000 (99.8298)LR: 6.570e-03
Train: 34 [ 150/390]  Loss: 0.3158 (0.227)  Acc@1: 89.0625 (92.0840)  Acc@5: 98.4375 (99.8344)LR: 6.570e-03
Train: 34 [ 200/390]  Loss: 0.2471 (0.226)  Acc@1: 92.1875 (92.0709)  Acc@5: 100.0000 (99.8523)LR: 6.570e-03
Train: 34 [ 250/390]  Loss: 0.1384 (0.220)  Acc@1: 92.1875 (92.3743)  Acc@5: 100.0000 (99.8506)LR: 6.570e-03
Train: 34 [ 300/390]  Loss: 0.2368 (0.220)  Acc@1: 87.5000 (92.2757)  Acc@5: 100.0000 (99.8547)LR: 6.570e-03
Train: 34 [ 350/390]  Loss: 0.5405 (0.219)  Acc@1: 84.3750 (92.3389)  Acc@5: 100.0000 (99.8442)LR: 6.570e-03
Train: 34 [ 390/390]  Loss: 0.2918 (0.219)  Acc@1: 85.0000 (92.3000)  Acc@5: 100.0000 (99.8520)LR: 6.570e-03
train_acc 92.300000
Valid: 34 [   0/390]  Loss: 0.2977 (0.298)  Acc@1: 92.1875 (92.1875)  Acc@5: 100.0000 (100.0000)
Valid: 34 [  50/390]  Loss: 0.5947 (0.383)  Acc@1: 79.6875 (87.8676)  Acc@5: 100.0000 (99.4485)
Valid: 34 [ 100/390]  Loss: 0.6826 (0.392)  Acc@1: 82.8125 (87.5619)  Acc@5: 100.0000 (99.5514)
Valid: 34 [ 150/390]  Loss: 0.3864 (0.397)  Acc@1: 89.0625 (87.4276)  Acc@5: 100.0000 (99.5033)
Valid: 34 [ 200/390]  Loss: 0.3214 (0.393)  Acc@1: 89.0625 (87.3756)  Acc@5: 100.0000 (99.5258)
Valid: 34 [ 250/390]  Loss: 0.2747 (0.392)  Acc@1: 92.1875 (87.3879)  Acc@5: 100.0000 (99.5331)
Valid: 34 [ 300/390]  Loss: 0.3083 (0.394)  Acc@1: 87.5000 (87.3650)  Acc@5: 100.0000 (99.5328)
Valid: 34 [ 350/390]  Loss: 0.5891 (0.393)  Acc@1: 79.6875 (87.4733)  Acc@5: 98.4375 (99.5103)
Valid: 34 [ 390/390]  Loss: 0.1940 (0.393)  Acc@1: 92.5000 (87.5240)  Acc@5: 100.0000 (99.4960)
valid_acc 87.524000
epoch = 34   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 0), ('dil_conv_5x5', 3), ('skip_connect', 2), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1393, 0.0586, 0.0483, 0.0838, 0.2251, 0.1747, 0.1472, 0.1229],
        [0.2242, 0.0437, 0.0405, 0.0625, 0.1905, 0.1364, 0.1549, 0.1474],
        [0.2272, 0.0720, 0.0532, 0.0975, 0.1319, 0.1345, 0.1577, 0.1259],
        [0.2543, 0.0509, 0.0463, 0.0736, 0.1788, 0.1430, 0.1213, 0.1318],
        [0.3065, 0.0443, 0.0374, 0.0772, 0.1336, 0.1311, 0.1350, 0.1349],
        [0.2640, 0.0686, 0.0562, 0.0964, 0.1477, 0.1356, 0.1203, 0.1112],
        [0.2741, 0.0486, 0.0473, 0.0732, 0.1533, 0.1836, 0.1086, 0.1114],
        [0.3448, 0.0451, 0.0390, 0.0789, 0.1327, 0.1173, 0.1164, 0.1257],
        [0.3975, 0.0351, 0.0339, 0.0525, 0.1161, 0.1078, 0.1248, 0.1324],
        [0.3628, 0.0615, 0.0511, 0.0863, 0.1270, 0.1011, 0.1017, 0.1084],
        [0.3724, 0.0449, 0.0430, 0.0679, 0.1569, 0.0952, 0.1126, 0.1070],
        [0.4579, 0.0399, 0.0363, 0.0743, 0.0911, 0.0795, 0.1073, 0.1137],
        [0.5548, 0.0296, 0.0293, 0.0441, 0.0780, 0.0772, 0.0894, 0.0976],
        [0.5438, 0.0267, 0.0259, 0.0346, 0.0842, 0.0866, 0.0829, 0.1154]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0952, 0.1732, 0.1397, 0.1035, 0.1342, 0.1231, 0.1235, 0.1077],
        [0.1460, 0.1267, 0.1192, 0.1261, 0.1132, 0.1223, 0.1240, 0.1227],
        [0.0954, 0.1555, 0.1368, 0.1193, 0.1079, 0.1472, 0.1367, 0.1010],
        [0.1293, 0.1371, 0.1343, 0.1264, 0.1013, 0.1206, 0.1540, 0.0970],
        [0.1431, 0.0920, 0.0848, 0.1359, 0.1282, 0.1249, 0.1336, 0.1576],
        [0.0952, 0.1585, 0.1306, 0.1399, 0.1129, 0.1316, 0.1244, 0.1070],
        [0.1169, 0.1365, 0.1344, 0.1354, 0.1125, 0.1037, 0.1225, 0.1381],
        [0.1334, 0.0925, 0.0887, 0.1442, 0.1534, 0.1305, 0.1265, 0.1308],
        [0.1492, 0.0822, 0.0873, 0.1371, 0.1238, 0.1177, 0.1478, 0.1549],
        [0.1048, 0.1506, 0.1329, 0.1153, 0.1241, 0.1338, 0.1208, 0.1175],
        [0.1184, 0.1512, 0.1452, 0.1073, 0.1220, 0.1233, 0.1200, 0.1127],
        [0.1531, 0.0861, 0.0872, 0.1630, 0.1302, 0.1177, 0.1140, 0.1487],
        [0.1573, 0.0789, 0.0838, 0.1474, 0.1340, 0.1187, 0.1595, 0.1205],
        [0.1709, 0.0757, 0.0810, 0.1372, 0.1278, 0.1352, 0.1352, 0.1370]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 35 [   0/390]  Loss: 0.2341 (0.234)  Acc@1: 92.1875 (92.1875)  Acc@5: 100.0000 (100.0000)LR: 5.947e-03
Train: 35 [  50/390]  Loss: 0.2606 (0.206)  Acc@1: 92.1875 (92.5551)  Acc@5: 100.0000 (99.8468)LR: 5.947e-03
Train: 35 [ 100/390]  Loss: 0.1753 (0.205)  Acc@1: 92.1875 (92.6207)  Acc@5: 100.0000 (99.8917)LR: 5.947e-03
Train: 35 [ 150/390]  Loss: 0.2301 (0.202)  Acc@1: 93.7500 (92.8498)  Acc@5: 100.0000 (99.8862)LR: 5.947e-03
Train: 35 [ 200/390]  Loss: 0.1580 (0.203)  Acc@1: 89.0625 (92.8638)  Acc@5: 100.0000 (99.8678)LR: 5.947e-03
Train: 35 [ 250/390]  Loss: 0.1158 (0.204)  Acc@1: 96.8750 (92.8474)  Acc@5: 100.0000 (99.8693)LR: 5.947e-03
Train: 35 [ 300/390]  Loss: 0.1309 (0.203)  Acc@1: 93.7500 (92.8935)  Acc@5: 100.0000 (99.8858)LR: 5.947e-03
Train: 35 [ 350/390]  Loss: 0.1998 (0.207)  Acc@1: 93.7500 (92.7484)  Acc@5: 100.0000 (99.8709)LR: 5.947e-03
Train: 35 [ 390/390]  Loss: 0.2207 (0.210)  Acc@1: 92.5000 (92.6760)  Acc@5: 100.0000 (99.8520)LR: 5.947e-03
train_acc 92.676000
Valid: 35 [   0/390]  Loss: 0.4710 (0.471)  Acc@1: 82.8125 (82.8125)  Acc@5: 98.4375 (98.4375)
Valid: 35 [  50/390]  Loss: 0.4821 (0.379)  Acc@1: 84.3750 (87.7451)  Acc@5: 100.0000 (99.5098)
Valid: 35 [ 100/390]  Loss: 0.5164 (0.372)  Acc@1: 82.8125 (87.8713)  Acc@5: 98.4375 (99.5204)
Valid: 35 [ 150/390]  Loss: 0.2236 (0.378)  Acc@1: 92.1875 (87.7276)  Acc@5: 100.0000 (99.5550)
Valid: 35 [ 200/390]  Loss: 0.3173 (0.380)  Acc@1: 90.6250 (87.6166)  Acc@5: 98.4375 (99.5258)
Valid: 35 [ 250/390]  Loss: 0.2658 (0.382)  Acc@1: 90.6250 (87.5560)  Acc@5: 100.0000 (99.5020)
Valid: 35 [ 300/390]  Loss: 0.4317 (0.391)  Acc@1: 85.9375 (87.3443)  Acc@5: 100.0000 (99.5069)
Valid: 35 [ 350/390]  Loss: 0.2495 (0.386)  Acc@1: 90.6250 (87.4955)  Acc@5: 100.0000 (99.5059)
Valid: 35 [ 390/390]  Loss: 0.3060 (0.386)  Acc@1: 90.0000 (87.5760)  Acc@5: 97.5000 (99.5160)
valid_acc 87.576000
epoch = 35   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('dil_conv_5x5', 2), ('dil_conv_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 3), ('skip_connect', 2), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1391, 0.0573, 0.0476, 0.0836, 0.2274, 0.1747, 0.1478, 0.1225],
        [0.2296, 0.0424, 0.0397, 0.0615, 0.1897, 0.1343, 0.1552, 0.1476],
        [0.2322, 0.0709, 0.0525, 0.0974, 0.1314, 0.1326, 0.1585, 0.1246],
        [0.2613, 0.0500, 0.0457, 0.0734, 0.1791, 0.1400, 0.1195, 0.1309],
        [0.3155, 0.0430, 0.0365, 0.0761, 0.1331, 0.1286, 0.1333, 0.1339],
        [0.2700, 0.0676, 0.0556, 0.0965, 0.1478, 0.1342, 0.1193, 0.1089],
        [0.2839, 0.0476, 0.0467, 0.0726, 0.1521, 0.1812, 0.1069, 0.1091],
        [0.3548, 0.0437, 0.0381, 0.0779, 0.1322, 0.1154, 0.1143, 0.1237],
        [0.4120, 0.0338, 0.0329, 0.0512, 0.1134, 0.1046, 0.1225, 0.1296],
        [0.3752, 0.0600, 0.0499, 0.0853, 0.1257, 0.0986, 0.0984, 0.1069],
        [0.3898, 0.0437, 0.0419, 0.0668, 0.1524, 0.0925, 0.1097, 0.1032],
        [0.4750, 0.0382, 0.0351, 0.0724, 0.0877, 0.0769, 0.1043, 0.1105],
        [0.5744, 0.0281, 0.0279, 0.0420, 0.0746, 0.0742, 0.0857, 0.0930],
        [0.5626, 0.0254, 0.0248, 0.0332, 0.0802, 0.0826, 0.0809, 0.1103]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0952, 0.1732, 0.1392, 0.1037, 0.1341, 0.1231, 0.1249, 0.1066],
        [0.1456, 0.1268, 0.1187, 0.1266, 0.1128, 0.1217, 0.1245, 0.1233],
        [0.0947, 0.1541, 0.1358, 0.1213, 0.1076, 0.1504, 0.1364, 0.0997],
        [0.1291, 0.1373, 0.1344, 0.1274, 0.1004, 0.1201, 0.1546, 0.0967],
        [0.1440, 0.0904, 0.0846, 0.1367, 0.1288, 0.1244, 0.1325, 0.1585],
        [0.0948, 0.1589, 0.1308, 0.1399, 0.1127, 0.1323, 0.1237, 0.1069],
        [0.1176, 0.1369, 0.1348, 0.1348, 0.1126, 0.1022, 0.1219, 0.1392],
        [0.1332, 0.0911, 0.0884, 0.1457, 0.1539, 0.1310, 0.1263, 0.1305],
        [0.1502, 0.0809, 0.0867, 0.1378, 0.1233, 0.1184, 0.1480, 0.1548],
        [0.1050, 0.1497, 0.1320, 0.1154, 0.1257, 0.1338, 0.1207, 0.1177],
        [0.1182, 0.1509, 0.1452, 0.1076, 0.1219, 0.1231, 0.1207, 0.1123],
        [0.1551, 0.0842, 0.0867, 0.1651, 0.1302, 0.1164, 0.1152, 0.1471],
        [0.1560, 0.0774, 0.0829, 0.1471, 0.1365, 0.1195, 0.1607, 0.1199],
        [0.1715, 0.0740, 0.0799, 0.1366, 0.1282, 0.1360, 0.1347, 0.1390]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 36 [   0/390]  Loss: 0.1713 (0.171)  Acc@1: 95.3125 (95.3125)  Acc@5: 100.0000 (100.0000)LR: 5.351e-03
Train: 36 [  50/390]  Loss: 0.2190 (0.200)  Acc@1: 90.6250 (92.8922)  Acc@5: 100.0000 (100.0000)LR: 5.351e-03
Train: 36 [ 100/390]  Loss: 0.1973 (0.199)  Acc@1: 92.1875 (93.1312)  Acc@5: 100.0000 (99.9072)LR: 5.351e-03
Train: 36 [ 150/390]  Loss: 0.2177 (0.204)  Acc@1: 92.1875 (92.9118)  Acc@5: 100.0000 (99.9172)LR: 5.351e-03
Train: 36 [ 200/390]  Loss: 0.2377 (0.200)  Acc@1: 92.1875 (93.0348)  Acc@5: 100.0000 (99.9300)LR: 5.351e-03
Train: 36 [ 250/390]  Loss: 0.2550 (0.201)  Acc@1: 93.7500 (92.9968)  Acc@5: 100.0000 (99.9191)LR: 5.351e-03
Train: 36 [ 300/390]  Loss: 0.5543 (0.202)  Acc@1: 79.6875 (93.0181)  Acc@5: 98.4375 (99.8962)LR: 5.351e-03
Train: 36 [ 350/390]  Loss: 0.2459 (0.202)  Acc@1: 92.1875 (92.9443)  Acc@5: 98.4375 (99.8932)LR: 5.351e-03
Train: 36 [ 390/390]  Loss: 0.1968 (0.203)  Acc@1: 90.0000 (92.9000)  Acc@5: 100.0000 (99.8680)LR: 5.351e-03
train_acc 92.900000
Valid: 36 [   0/390]  Loss: 0.2325 (0.233)  Acc@1: 93.7500 (93.7500)  Acc@5: 100.0000 (100.0000)
Valid: 36 [  50/390]  Loss: 0.6124 (0.377)  Acc@1: 79.6875 (87.8064)  Acc@5: 98.4375 (99.4179)
Valid: 36 [ 100/390]  Loss: 0.5054 (0.398)  Acc@1: 81.2500 (86.8967)  Acc@5: 100.0000 (99.3812)
Valid: 36 [ 150/390]  Loss: 0.4277 (0.387)  Acc@1: 87.5000 (87.2827)  Acc@5: 98.4375 (99.4516)
Valid: 36 [ 200/390]  Loss: 0.4913 (0.388)  Acc@1: 84.3750 (87.2901)  Acc@5: 100.0000 (99.4403)
Valid: 36 [ 250/390]  Loss: 0.3583 (0.384)  Acc@1: 87.5000 (87.3008)  Acc@5: 100.0000 (99.4833)
Valid: 36 [ 300/390]  Loss: 0.3815 (0.380)  Acc@1: 85.9375 (87.3962)  Acc@5: 96.8750 (99.5017)
Valid: 36 [ 350/390]  Loss: 0.6677 (0.380)  Acc@1: 78.1250 (87.4065)  Acc@5: 98.4375 (99.5014)
Valid: 36 [ 390/390]  Loss: 0.1675 (0.379)  Acc@1: 92.5000 (87.4040)  Acc@5: 100.0000 (99.5120)
valid_acc 87.404000
epoch = 36   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('dil_conv_3x3', 1), ('dil_conv_5x5', 2), ('dil_conv_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 3), ('skip_connect', 2), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1386, 0.0554, 0.0466, 0.0822, 0.2312, 0.1762, 0.1485, 0.1213],
        [0.2355, 0.0409, 0.0384, 0.0598, 0.1890, 0.1323, 0.1547, 0.1494],
        [0.2371, 0.0688, 0.0515, 0.0966, 0.1313, 0.1325, 0.1588, 0.1234],
        [0.2691, 0.0484, 0.0444, 0.0719, 0.1798, 0.1380, 0.1184, 0.1300],
        [0.3236, 0.0415, 0.0354, 0.0745, 0.1330, 0.1266, 0.1317, 0.1337],
        [0.2777, 0.0659, 0.0548, 0.0963, 0.1473, 0.1320, 0.1179, 0.1080],
        [0.2938, 0.0459, 0.0452, 0.0708, 0.1520, 0.1787, 0.1064, 0.1072],
        [0.3673, 0.0420, 0.0371, 0.0768, 0.1291, 0.1138, 0.1117, 0.1222],
        [0.4289, 0.0326, 0.0320, 0.0499, 0.1100, 0.1011, 0.1190, 0.1267],
        [0.3891, 0.0581, 0.0489, 0.0845, 0.1223, 0.0956, 0.0960, 0.1054],
        [0.4080, 0.0422, 0.0406, 0.0654, 0.1466, 0.0909, 0.1068, 0.0995],
        [0.4911, 0.0370, 0.0339, 0.0703, 0.0850, 0.0743, 0.1018, 0.1066],
        [0.5929, 0.0268, 0.0268, 0.0403, 0.0708, 0.0714, 0.0820, 0.0890],
        [0.5826, 0.0242, 0.0237, 0.0318, 0.0762, 0.0785, 0.0776, 0.1053]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0947, 0.1733, 0.1402, 0.1037, 0.1338, 0.1217, 0.1255, 0.1069],
        [0.1457, 0.1258, 0.1180, 0.1263, 0.1129, 0.1218, 0.1267, 0.1228],
        [0.0943, 0.1536, 0.1364, 0.1228, 0.1074, 0.1513, 0.1346, 0.0995],
        [0.1280, 0.1374, 0.1347, 0.1284, 0.1003, 0.1196, 0.1560, 0.0956],
        [0.1451, 0.0886, 0.0836, 0.1372, 0.1287, 0.1252, 0.1333, 0.1584],
        [0.0938, 0.1584, 0.1313, 0.1398, 0.1129, 0.1331, 0.1232, 0.1076],
        [0.1179, 0.1365, 0.1345, 0.1353, 0.1120, 0.1012, 0.1224, 0.1401],
        [0.1344, 0.0899, 0.0881, 0.1475, 0.1532, 0.1309, 0.1260, 0.1301],
        [0.1505, 0.0794, 0.0862, 0.1384, 0.1226, 0.1180, 0.1489, 0.1561],
        [0.1042, 0.1483, 0.1320, 0.1157, 0.1274, 0.1339, 0.1210, 0.1175],
        [0.1174, 0.1500, 0.1446, 0.1074, 0.1219, 0.1234, 0.1222, 0.1131],
        [0.1567, 0.0829, 0.0860, 0.1673, 0.1288, 0.1158, 0.1154, 0.1472],
        [0.1573, 0.0759, 0.0823, 0.1483, 0.1374, 0.1201, 0.1594, 0.1194],
        [0.1739, 0.0723, 0.0789, 0.1367, 0.1276, 0.1365, 0.1352, 0.1389]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 37 [   0/390]  Loss: 0.1927 (0.193)  Acc@1: 92.1875 (92.1875)  Acc@5: 100.0000 (100.0000)LR: 4.785e-03
Train: 37 [  50/390]  Loss: 0.1275 (0.178)  Acc@1: 96.8750 (93.5968)  Acc@5: 100.0000 (99.9694)LR: 4.785e-03
Train: 37 [ 100/390]  Loss: 0.1793 (0.184)  Acc@1: 93.7500 (93.4870)  Acc@5: 100.0000 (99.9226)LR: 4.785e-03
Train: 37 [ 150/390]  Loss: 0.2214 (0.182)  Acc@1: 92.1875 (93.5017)  Acc@5: 100.0000 (99.9379)LR: 4.785e-03
Train: 37 [ 200/390]  Loss: 0.3863 (0.187)  Acc@1: 85.9375 (93.4235)  Acc@5: 100.0000 (99.9223)LR: 4.785e-03
Train: 37 [ 250/390]  Loss: 0.2554 (0.189)  Acc@1: 89.0625 (93.4574)  Acc@5: 100.0000 (99.9066)LR: 4.785e-03
Train: 37 [ 300/390]  Loss: 0.2507 (0.189)  Acc@1: 92.1875 (93.5008)  Acc@5: 100.0000 (99.9221)LR: 4.785e-03
Train: 37 [ 350/390]  Loss: 0.2103 (0.195)  Acc@1: 92.1875 (93.2959)  Acc@5: 100.0000 (99.9199)LR: 4.785e-03
Train: 37 [ 390/390]  Loss: 0.6793 (0.199)  Acc@1: 77.5000 (93.0800)  Acc@5: 100.0000 (99.9040)LR: 4.785e-03
train_acc 93.080000
Valid: 37 [   0/390]  Loss: 0.2423 (0.242)  Acc@1: 93.7500 (93.7500)  Acc@5: 98.4375 (98.4375)
Valid: 37 [  50/390]  Loss: 0.6932 (0.397)  Acc@1: 76.5625 (87.5306)  Acc@5: 98.4375 (99.2953)
Valid: 37 [ 100/390]  Loss: 0.3690 (0.412)  Acc@1: 90.6250 (86.9276)  Acc@5: 100.0000 (99.4121)
Valid: 37 [ 150/390]  Loss: 0.3755 (0.398)  Acc@1: 87.5000 (87.3551)  Acc@5: 100.0000 (99.4619)
Valid: 37 [ 200/390]  Loss: 0.3388 (0.401)  Acc@1: 85.9375 (87.2901)  Acc@5: 100.0000 (99.4248)
Valid: 37 [ 250/390]  Loss: 0.3267 (0.404)  Acc@1: 87.5000 (87.2136)  Acc@5: 100.0000 (99.3775)
Valid: 37 [ 300/390]  Loss: 0.4256 (0.406)  Acc@1: 81.2500 (87.0120)  Acc@5: 100.0000 (99.4134)
Valid: 37 [ 350/390]  Loss: 0.3036 (0.406)  Acc@1: 89.0625 (87.0192)  Acc@5: 100.0000 (99.3857)
Valid: 37 [ 390/390]  Loss: 0.2547 (0.408)  Acc@1: 87.5000 (86.9880)  Acc@5: 100.0000 (99.3840)
valid_acc 86.988000
epoch = 37   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('dil_conv_3x3', 1), ('dil_conv_5x5', 2), ('dil_conv_3x3', 1), ('dil_conv_5x5', 3), ('max_pool_3x3', 0), ('skip_connect', 2), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1390, 0.0543, 0.0463, 0.0823, 0.2331, 0.1758, 0.1480, 0.1211],
        [0.2417, 0.0397, 0.0377, 0.0587, 0.1886, 0.1288, 0.1550, 0.1499],
        [0.2420, 0.0674, 0.0508, 0.0959, 0.1308, 0.1318, 0.1588, 0.1225],
        [0.2761, 0.0474, 0.0437, 0.0711, 0.1793, 0.1362, 0.1178, 0.1284],
        [0.3325, 0.0404, 0.0347, 0.0730, 0.1335, 0.1237, 0.1304, 0.1319],
        [0.2833, 0.0650, 0.0542, 0.0960, 0.1472, 0.1300, 0.1180, 0.1063],
        [0.3024, 0.0449, 0.0444, 0.0696, 0.1512, 0.1764, 0.1045, 0.1066],
        [0.3792, 0.0409, 0.0365, 0.0753, 0.1281, 0.1108, 0.1088, 0.1203],
        [0.4425, 0.0317, 0.0313, 0.0489, 0.1069, 0.0983, 0.1156, 0.1248],
        [0.4018, 0.0565, 0.0480, 0.0837, 0.1194, 0.0936, 0.0941, 0.1030],
        [0.4244, 0.0409, 0.0398, 0.0639, 0.1424, 0.0890, 0.1036, 0.0959],
        [0.5073, 0.0355, 0.0328, 0.0679, 0.0821, 0.0719, 0.0988, 0.1037],
        [0.6102, 0.0258, 0.0260, 0.0391, 0.0670, 0.0686, 0.0780, 0.0853],
        [0.5983, 0.0234, 0.0231, 0.0308, 0.0731, 0.0749, 0.0746, 0.1018]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0946, 0.1719, 0.1405, 0.1037, 0.1348, 0.1220, 0.1252, 0.1072],
        [0.1457, 0.1253, 0.1177, 0.1269, 0.1125, 0.1219, 0.1269, 0.1232],
        [0.0942, 0.1529, 0.1383, 0.1228, 0.1075, 0.1512, 0.1340, 0.0992],
        [0.1271, 0.1367, 0.1352, 0.1287, 0.0996, 0.1210, 0.1565, 0.0953],
        [0.1463, 0.0880, 0.0836, 0.1381, 0.1290, 0.1246, 0.1331, 0.1573],
        [0.0934, 0.1567, 0.1309, 0.1415, 0.1127, 0.1340, 0.1237, 0.1071],
        [0.1179, 0.1348, 0.1336, 0.1365, 0.1127, 0.1016, 0.1221, 0.1407],
        [0.1352, 0.0892, 0.0873, 0.1476, 0.1534, 0.1314, 0.1261, 0.1299],
        [0.1509, 0.0778, 0.0853, 0.1375, 0.1231, 0.1193, 0.1493, 0.1569],
        [0.1042, 0.1463, 0.1318, 0.1164, 0.1286, 0.1336, 0.1216, 0.1175],
        [0.1171, 0.1482, 0.1433, 0.1073, 0.1225, 0.1238, 0.1226, 0.1152],
        [0.1576, 0.0821, 0.0855, 0.1678, 0.1289, 0.1165, 0.1147, 0.1470],
        [0.1582, 0.0746, 0.0820, 0.1482, 0.1386, 0.1208, 0.1586, 0.1191],
        [0.1746, 0.0717, 0.0786, 0.1369, 0.1277, 0.1359, 0.1349, 0.1397]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 38 [   0/390]  Loss: 0.1275 (0.128)  Acc@1: 92.1875 (92.1875)  Acc@5: 100.0000 (100.0000)LR: 4.252e-03
Train: 38 [  50/390]  Loss: 0.1007 (0.204)  Acc@1: 98.4375 (92.5551)  Acc@5: 100.0000 (99.9387)LR: 4.252e-03
Train: 38 [ 100/390]  Loss: 0.1653 (0.195)  Acc@1: 92.1875 (92.6825)  Acc@5: 100.0000 (99.9226)LR: 4.252e-03
Train: 38 [ 150/390]  Loss: 0.2372 (0.192)  Acc@1: 93.7500 (92.9429)  Acc@5: 100.0000 (99.9172)LR: 4.252e-03
Train: 38 [ 200/390]  Loss: 0.1129 (0.191)  Acc@1: 96.8750 (93.0659)  Acc@5: 100.0000 (99.9300)LR: 4.252e-03
Train: 38 [ 250/390]  Loss: 0.1467 (0.199)  Acc@1: 95.3125 (92.7851)  Acc@5: 100.0000 (99.9066)LR: 4.252e-03
Train: 38 [ 300/390]  Loss: 0.1408 (0.199)  Acc@1: 96.8750 (92.8883)  Acc@5: 100.0000 (99.9221)LR: 4.252e-03
Train: 38 [ 350/390]  Loss: 0.2057 (0.198)  Acc@1: 93.7500 (92.9398)  Acc@5: 100.0000 (99.9199)LR: 4.252e-03
Train: 38 [ 390/390]  Loss: 0.2698 (0.199)  Acc@1: 87.5000 (92.9360)  Acc@5: 100.0000 (99.9120)LR: 4.252e-03
train_acc 92.936000
Valid: 38 [   0/390]  Loss: 0.2021 (0.202)  Acc@1: 92.1875 (92.1875)  Acc@5: 100.0000 (100.0000)
Valid: 38 [  50/390]  Loss: 0.2571 (0.359)  Acc@1: 92.1875 (88.4804)  Acc@5: 100.0000 (99.5098)
Valid: 38 [ 100/390]  Loss: 0.6631 (0.368)  Acc@1: 82.8125 (88.2890)  Acc@5: 98.4375 (99.4121)
Valid: 38 [ 150/390]  Loss: 0.2527 (0.363)  Acc@1: 89.0625 (88.3589)  Acc@5: 100.0000 (99.5240)
Valid: 38 [ 200/390]  Loss: 0.5178 (0.366)  Acc@1: 87.5000 (88.2152)  Acc@5: 96.8750 (99.5103)
Valid: 38 [ 250/390]  Loss: 0.2941 (0.359)  Acc@1: 85.9375 (88.5022)  Acc@5: 100.0000 (99.5518)
Valid: 38 [ 300/390]  Loss: 0.4619 (0.366)  Acc@1: 85.9375 (88.2527)  Acc@5: 98.4375 (99.5017)
Valid: 38 [ 350/390]  Loss: 0.09413 (0.366)  Acc@1: 98.4375 (88.1722)  Acc@5: 100.0000 (99.5192)
Valid: 38 [ 390/390]  Loss: 0.4447 (0.366)  Acc@1: 82.5000 (88.1280)  Acc@5: 100.0000 (99.4840)
valid_acc 88.128000
epoch = 38   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('skip_connect', 1), ('dil_conv_5x5', 2), ('dil_conv_3x3', 1), ('dil_conv_5x5', 3), ('max_pool_3x3', 0), ('skip_connect', 2), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1390, 0.0528, 0.0455, 0.0816, 0.2391, 0.1758, 0.1466, 0.1195],
        [0.2465, 0.0387, 0.0368, 0.0576, 0.1886, 0.1273, 0.1550, 0.1493],
        [0.2473, 0.0657, 0.0502, 0.0962, 0.1301, 0.1318, 0.1574, 0.1213],
        [0.2837, 0.0463, 0.0428, 0.0703, 0.1770, 0.1348, 0.1175, 0.1276],
        [0.3393, 0.0393, 0.0340, 0.0719, 0.1322, 0.1223, 0.1298, 0.1312],
        [0.2910, 0.0639, 0.0536, 0.0962, 0.1468, 0.1264, 0.1169, 0.1051],
        [0.3118, 0.0438, 0.0435, 0.0689, 0.1494, 0.1737, 0.1039, 0.1049],
        [0.3903, 0.0397, 0.0357, 0.0739, 0.1260, 0.1084, 0.1072, 0.1188],
        [0.4597, 0.0305, 0.0305, 0.0478, 0.1033, 0.0953, 0.1119, 0.1209],
        [0.4178, 0.0543, 0.0467, 0.0824, 0.1158, 0.0912, 0.0912, 0.1006],
        [0.4404, 0.0396, 0.0385, 0.0623, 0.1385, 0.0869, 0.1008, 0.0931],
        [0.5233, 0.0341, 0.0317, 0.0658, 0.0791, 0.0694, 0.0958, 0.1007],
        [0.6297, 0.0245, 0.0249, 0.0374, 0.0634, 0.0655, 0.0739, 0.0808],
        [0.6174, 0.0222, 0.0220, 0.0295, 0.0691, 0.0710, 0.0708, 0.0980]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0945, 0.1716, 0.1402, 0.1032, 0.1358, 0.1208, 0.1261, 0.1078],
        [0.1452, 0.1247, 0.1184, 0.1273, 0.1118, 0.1218, 0.1270, 0.1238],
        [0.0934, 0.1526, 0.1386, 0.1239, 0.1076, 0.1519, 0.1339, 0.0982],
        [0.1274, 0.1369, 0.1368, 0.1288, 0.0995, 0.1200, 0.1565, 0.0943],
        [0.1469, 0.0872, 0.0835, 0.1393, 0.1288, 0.1244, 0.1323, 0.1576],
        [0.0929, 0.1564, 0.1307, 0.1417, 0.1126, 0.1353, 0.1232, 0.1073],
        [0.1184, 0.1336, 0.1338, 0.1378, 0.1118, 0.1012, 0.1228, 0.1405],
        [0.1344, 0.0883, 0.0869, 0.1485, 0.1541, 0.1313, 0.1263, 0.1302],
        [0.1524, 0.0762, 0.0846, 0.1375, 0.1219, 0.1200, 0.1503, 0.1571],
        [0.1041, 0.1458, 0.1314, 0.1172, 0.1301, 0.1336, 0.1217, 0.1161],
        [0.1169, 0.1474, 0.1442, 0.1072, 0.1232, 0.1231, 0.1225, 0.1155],
        [0.1570, 0.0812, 0.0852, 0.1693, 0.1295, 0.1166, 0.1144, 0.1469],
        [0.1596, 0.0729, 0.0812, 0.1487, 0.1409, 0.1206, 0.1574, 0.1187],
        [0.1753, 0.0705, 0.0779, 0.1368, 0.1281, 0.1358, 0.1361, 0.1395]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 39 [   0/390]  Loss: 0.2090 (0.209)  Acc@1: 89.0625 (89.0625)  Acc@5: 100.0000 (100.0000)LR: 3.754e-03
Train: 39 [  50/390]  Loss: 0.1172 (0.190)  Acc@1: 96.8750 (93.3211)  Acc@5: 100.0000 (99.9387)LR: 3.754e-03
Train: 39 [ 100/390]  Loss: 0.2118 (0.188)  Acc@1: 95.3125 (93.5953)  Acc@5: 100.0000 (99.9536)LR: 3.754e-03
Train: 39 [ 150/390]  Loss: 0.1140 (0.189)  Acc@1: 98.4375 (93.3671)  Acc@5: 100.0000 (99.9276)LR: 3.754e-03
Train: 39 [ 200/390]  Loss: 0.1825 (0.190)  Acc@1: 92.1875 (93.3924)  Acc@5: 100.0000 (99.9223)LR: 3.754e-03
Train: 39 [ 250/390]  Loss: 0.2302 (0.192)  Acc@1: 92.1875 (93.2582)  Acc@5: 100.0000 (99.9128)LR: 3.754e-03
Train: 39 [ 300/390]  Loss: 0.1996 (0.195)  Acc@1: 92.1875 (93.2049)  Acc@5: 100.0000 (99.9221)LR: 3.754e-03
Train: 39 [ 350/390]  Loss: 0.2676 (0.194)  Acc@1: 90.6250 (93.2336)  Acc@5: 100.0000 (99.9110)LR: 3.754e-03
Train: 39 [ 390/390]  Loss: 0.5211 (0.195)  Acc@1: 85.0000 (93.2720)  Acc@5: 100.0000 (99.9040)LR: 3.754e-03
train_acc 93.272000
Valid: 39 [   0/390]  Loss: 0.3713 (0.371)  Acc@1: 90.6250 (90.6250)  Acc@5: 98.4375 (98.4375)
Valid: 39 [  50/390]  Loss: 0.2275 (0.336)  Acc@1: 93.7500 (89.3689)  Acc@5: 100.0000 (99.6017)
Valid: 39 [ 100/390]  Loss: 0.6397 (0.357)  Acc@1: 82.8125 (88.6757)  Acc@5: 98.4375 (99.6132)
Valid: 39 [ 150/390]  Loss: 0.5393 (0.354)  Acc@1: 81.2500 (88.8452)  Acc@5: 100.0000 (99.5550)
Valid: 39 [ 200/390]  Loss: 0.4304 (0.355)  Acc@1: 87.5000 (88.6738)  Acc@5: 96.8750 (99.5258)
Valid: 39 [ 250/390]  Loss: 0.7070 (0.354)  Acc@1: 75.0000 (88.6828)  Acc@5: 98.4375 (99.5518)
Valid: 39 [ 300/390]  Loss: 0.2742 (0.353)  Acc@1: 90.6250 (88.7043)  Acc@5: 100.0000 (99.5536)
Valid: 39 [ 350/390]  Loss: 0.4058 (0.354)  Acc@1: 85.9375 (88.6262)  Acc@5: 100.0000 (99.5593)
Valid: 39 [ 390/390]  Loss: 0.4578 (0.353)  Acc@1: 80.0000 (88.5640)  Acc@5: 100.0000 (99.5640)
valid_acc 88.564000
epoch = 39   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('skip_connect', 1), ('dil_conv_5x5', 2), ('dil_conv_3x3', 1), ('dil_conv_5x5', 3), ('max_pool_3x3', 0), ('skip_connect', 2), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1395, 0.0518, 0.0451, 0.0813, 0.2401, 0.1767, 0.1462, 0.1194],
        [0.2523, 0.0375, 0.0359, 0.0563, 0.1892, 0.1262, 0.1541, 0.1486],
        [0.2513, 0.0644, 0.0496, 0.0955, 0.1307, 0.1313, 0.1565, 0.1207],
        [0.2937, 0.0450, 0.0417, 0.0687, 0.1764, 0.1323, 0.1166, 0.1256],
        [0.3473, 0.0382, 0.0333, 0.0701, 0.1324, 0.1210, 0.1288, 0.1289],
        [0.2959, 0.0632, 0.0532, 0.0962, 0.1465, 0.1251, 0.1161, 0.1038],
        [0.3237, 0.0427, 0.0424, 0.0676, 0.1464, 0.1714, 0.1030, 0.1028],
        [0.4037, 0.0386, 0.0350, 0.0724, 0.1240, 0.1058, 0.1038, 0.1168],
        [0.4750, 0.0298, 0.0298, 0.0467, 0.1000, 0.0919, 0.1090, 0.1178],
        [0.4291, 0.0531, 0.0459, 0.0811, 0.1123, 0.0900, 0.0896, 0.0989],
        [0.4585, 0.0384, 0.0373, 0.0606, 0.1334, 0.0848, 0.0976, 0.0895],
        [0.5390, 0.0331, 0.0310, 0.0638, 0.0762, 0.0670, 0.0926, 0.0973],
        [0.6454, 0.0238, 0.0242, 0.0362, 0.0601, 0.0627, 0.0707, 0.0769],
        [0.6338, 0.0215, 0.0214, 0.0286, 0.0657, 0.0679, 0.0676, 0.0935]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0937, 0.1705, 0.1404, 0.1030, 0.1371, 0.1218, 0.1266, 0.1069],
        [0.1459, 0.1243, 0.1188, 0.1272, 0.1121, 0.1217, 0.1270, 0.1228],
        [0.0933, 0.1507, 0.1386, 0.1248, 0.1067, 0.1529, 0.1348, 0.0981],
        [0.1274, 0.1364, 0.1374, 0.1290, 0.0990, 0.1203, 0.1568, 0.0938],
        [0.1469, 0.0850, 0.0829, 0.1390, 0.1296, 0.1249, 0.1321, 0.1596],
        [0.0926, 0.1549, 0.1311, 0.1425, 0.1127, 0.1349, 0.1241, 0.1073],
        [0.1181, 0.1332, 0.1346, 0.1394, 0.1108, 0.1014, 0.1226, 0.1399],
        [0.1340, 0.0866, 0.0872, 0.1491, 0.1536, 0.1328, 0.1262, 0.1304],
        [0.1526, 0.0754, 0.0851, 0.1384, 0.1217, 0.1202, 0.1496, 0.1569],
        [0.1046, 0.1440, 0.1312, 0.1192, 0.1309, 0.1318, 0.1220, 0.1164],
        [0.1163, 0.1461, 0.1449, 0.1069, 0.1235, 0.1225, 0.1236, 0.1163],
        [0.1578, 0.0793, 0.0846, 0.1699, 0.1291, 0.1169, 0.1141, 0.1483],
        [0.1603, 0.0714, 0.0808, 0.1486, 0.1425, 0.1218, 0.1563, 0.1184],
        [0.1779, 0.0691, 0.0772, 0.1364, 0.1272, 0.1348, 0.1370, 0.1405]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 40 [   0/390]  Loss: 0.1680 (0.168)  Acc@1: 95.3125 (95.3125)  Acc@5: 100.0000 (100.0000)LR: 3.292e-03
Train: 40 [  50/390]  Loss: 0.2492 (0.182)  Acc@1: 89.0625 (93.5662)  Acc@5: 100.0000 (99.8775)LR: 3.292e-03
Train: 40 [ 100/390]  Loss: 0.2834 (0.178)  Acc@1: 85.9375 (93.5798)  Acc@5: 100.0000 (99.9226)LR: 3.292e-03
Train: 40 [ 150/390]  Loss: 0.2414 (0.181)  Acc@1: 92.1875 (93.5120)  Acc@5: 100.0000 (99.9172)LR: 3.292e-03
Train: 40 [ 200/390]  Loss: 0.3351 (0.186)  Acc@1: 90.6250 (93.4935)  Acc@5: 100.0000 (99.9145)LR: 3.292e-03
Train: 40 [ 250/390]  Loss: 0.08850 (0.188)  Acc@1: 96.8750 (93.4636)  Acc@5: 100.0000 (99.9128)LR: 3.292e-03
Train: 40 [ 300/390]  Loss: 0.2126 (0.191)  Acc@1: 90.6250 (93.3295)  Acc@5: 100.0000 (99.9169)LR: 3.292e-03
Train: 40 [ 350/390]  Loss: 0.1819 (0.190)  Acc@1: 95.3125 (93.4072)  Acc@5: 100.0000 (99.9199)LR: 3.292e-03
Train: 40 [ 390/390]  Loss: 0.3754 (0.191)  Acc@1: 87.5000 (93.3520)  Acc@5: 100.0000 (99.9240)LR: 3.292e-03
train_acc 93.352000
Valid: 40 [   0/390]  Loss: 0.3160 (0.316)  Acc@1: 87.5000 (87.5000)  Acc@5: 100.0000 (100.0000)
Valid: 40 [  50/390]  Loss: 0.3238 (0.356)  Acc@1: 89.0625 (88.5723)  Acc@5: 100.0000 (99.5098)
Valid: 40 [ 100/390]  Loss: 0.2993 (0.348)  Acc@1: 87.5000 (88.8150)  Acc@5: 100.0000 (99.5668)
Valid: 40 [ 150/390]  Loss: 0.3297 (0.352)  Acc@1: 92.1875 (88.7003)  Acc@5: 100.0000 (99.5550)
Valid: 40 [ 200/390]  Loss: 0.6132 (0.352)  Acc@1: 76.5625 (88.6505)  Acc@5: 100.0000 (99.5569)
Valid: 40 [ 250/390]  Loss: 0.3127 (0.356)  Acc@1: 85.9375 (88.4960)  Acc@5: 100.0000 (99.5518)
Valid: 40 [ 300/390]  Loss: 0.4086 (0.355)  Acc@1: 89.0625 (88.5797)  Acc@5: 100.0000 (99.5432)
Valid: 40 [ 350/390]  Loss: 0.4460 (0.354)  Acc@1: 85.9375 (88.5684)  Acc@5: 100.0000 (99.5548)
Valid: 40 [ 390/390]  Loss: 0.3936 (0.353)  Acc@1: 87.5000 (88.5760)  Acc@5: 100.0000 (99.5600)
valid_acc 88.576000
epoch = 40   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('dil_conv_3x3', 1), ('dil_conv_5x5', 2), ('dil_conv_3x3', 1), ('dil_conv_5x5', 3), ('sep_conv_3x3', 2), ('skip_connect', 2), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1398, 0.0507, 0.0448, 0.0810, 0.2434, 0.1772, 0.1453, 0.1178],
        [0.2591, 0.0364, 0.0352, 0.0554, 0.1882, 0.1239, 0.1532, 0.1485],
        [0.2546, 0.0631, 0.0494, 0.0956, 0.1308, 0.1310, 0.1564, 0.1192],
        [0.3020, 0.0441, 0.0410, 0.0678, 0.1742, 0.1306, 0.1153, 0.1249],
        [0.3572, 0.0371, 0.0325, 0.0688, 0.1312, 0.1191, 0.1273, 0.1269],
        [0.3020, 0.0625, 0.0531, 0.0967, 0.1453, 0.1235, 0.1155, 0.1016],
        [0.3338, 0.0416, 0.0415, 0.0663, 0.1445, 0.1691, 0.1014, 0.1018],
        [0.4184, 0.0374, 0.0341, 0.0708, 0.1215, 0.1021, 0.1013, 0.1145],
        [0.4895, 0.0292, 0.0294, 0.0462, 0.0965, 0.0886, 0.1063, 0.1145],
        [0.4387, 0.0519, 0.0457, 0.0811, 0.1096, 0.0882, 0.0878, 0.0969],
        [0.4766, 0.0369, 0.0361, 0.0588, 0.1283, 0.0819, 0.0948, 0.0865],
        [0.5561, 0.0319, 0.0301, 0.0618, 0.0725, 0.0647, 0.0894, 0.0935],
        [0.6603, 0.0230, 0.0237, 0.0353, 0.0571, 0.0601, 0.0676, 0.0730],
        [0.6505, 0.0207, 0.0208, 0.0277, 0.0624, 0.0641, 0.0643, 0.0896]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0934, 0.1704, 0.1419, 0.1028, 0.1369, 0.1219, 0.1272, 0.1055],
        [0.1464, 0.1237, 0.1188, 0.1268, 0.1120, 0.1221, 0.1269, 0.1233],
        [0.0934, 0.1498, 0.1401, 0.1264, 0.1058, 0.1533, 0.1337, 0.0974],
        [0.1256, 0.1372, 0.1388, 0.1280, 0.0983, 0.1203, 0.1572, 0.0947],
        [0.1473, 0.0838, 0.0827, 0.1396, 0.1299, 0.1247, 0.1314, 0.1606],
        [0.0919, 0.1529, 0.1311, 0.1440, 0.1136, 0.1348, 0.1253, 0.1064],
        [0.1184, 0.1325, 0.1349, 0.1395, 0.1100, 0.1011, 0.1225, 0.1411],
        [0.1340, 0.0849, 0.0862, 0.1485, 0.1550, 0.1334, 0.1267, 0.1313],
        [0.1540, 0.0738, 0.0843, 0.1380, 0.1203, 0.1208, 0.1503, 0.1585],
        [0.1045, 0.1428, 0.1313, 0.1203, 0.1309, 0.1320, 0.1219, 0.1162],
        [0.1157, 0.1458, 0.1460, 0.1072, 0.1241, 0.1221, 0.1228, 0.1163],
        [0.1595, 0.0779, 0.0839, 0.1705, 0.1290, 0.1163, 0.1139, 0.1489],
        [0.1611, 0.0700, 0.0800, 0.1488, 0.1441, 0.1218, 0.1564, 0.1178],
        [0.1798, 0.0675, 0.0764, 0.1363, 0.1272, 0.1350, 0.1363, 0.1415]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 41 [   0/390]  Loss: 0.1911 (0.191)  Acc@1: 95.3125 (95.3125)  Acc@5: 100.0000 (100.0000)LR: 2.868e-03
Train: 41 [  50/390]  Loss: 0.1644 (0.192)  Acc@1: 96.8750 (93.2292)  Acc@5: 100.0000 (99.8775)LR: 2.868e-03
Train: 41 [ 100/390]  Loss: 0.3128 (0.189)  Acc@1: 89.0625 (93.3323)  Acc@5: 98.4375 (99.8917)LR: 2.868e-03
Train: 41 [ 150/390]  Loss: 0.2869 (0.192)  Acc@1: 85.9375 (93.0464)  Acc@5: 100.0000 (99.8862)LR: 2.868e-03
Train: 41 [ 200/390]  Loss: 0.05239 (0.187)  Acc@1: 100.0000 (93.2136)  Acc@5: 100.0000 (99.8912)LR: 2.868e-03
Train: 41 [ 250/390]  Loss: 0.2958 (0.190)  Acc@1: 92.1875 (93.1462)  Acc@5: 100.0000 (99.8942)LR: 2.868e-03
Train: 41 [ 300/390]  Loss: 0.2939 (0.190)  Acc@1: 87.5000 (93.1686)  Acc@5: 100.0000 (99.9014)LR: 2.868e-03
Train: 41 [ 350/390]  Loss: 0.1136 (0.191)  Acc@1: 93.7500 (93.1713)  Acc@5: 100.0000 (99.9021)LR: 2.868e-03
Train: 41 [ 390/390]  Loss: 0.2352 (0.190)  Acc@1: 90.0000 (93.2400)  Acc@5: 100.0000 (99.9120)LR: 2.868e-03
train_acc 93.240000
Valid: 41 [   0/390]  Loss: 0.2076 (0.208)  Acc@1: 90.6250 (90.6250)  Acc@5: 100.0000 (100.0000)
Valid: 41 [  50/390]  Loss: 0.2693 (0.380)  Acc@1: 87.5000 (87.4387)  Acc@5: 100.0000 (99.4179)
Valid: 41 [ 100/390]  Loss: 0.3509 (0.366)  Acc@1: 84.3750 (87.9332)  Acc@5: 100.0000 (99.4121)
Valid: 41 [ 150/390]  Loss: 0.5917 (0.368)  Acc@1: 82.8125 (88.1002)  Acc@5: 98.4375 (99.4516)
Valid: 41 [ 200/390]  Loss: 0.5240 (0.380)  Acc@1: 84.3750 (87.7799)  Acc@5: 98.4375 (99.4636)
Valid: 41 [ 250/390]  Loss: 0.4021 (0.374)  Acc@1: 85.9375 (87.9358)  Acc@5: 98.4375 (99.5082)
Valid: 41 [ 300/390]  Loss: 0.3782 (0.374)  Acc@1: 87.5000 (87.9205)  Acc@5: 100.0000 (99.5120)
Valid: 41 [ 350/390]  Loss: 0.4578 (0.375)  Acc@1: 87.5000 (87.9452)  Acc@5: 100.0000 (99.5192)
Valid: 41 [ 390/390]  Loss: 0.3523 (0.374)  Acc@1: 90.0000 (87.9280)  Acc@5: 100.0000 (99.5120)
valid_acc 87.928000
epoch = 41   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('dil_conv_3x3', 1), ('dil_conv_5x5', 2), ('dil_conv_3x3', 1), ('dil_conv_5x5', 3), ('sep_conv_3x3', 2), ('skip_connect', 2), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1410, 0.0492, 0.0440, 0.0800, 0.2473, 0.1765, 0.1449, 0.1172],
        [0.2631, 0.0357, 0.0345, 0.0543, 0.1893, 0.1222, 0.1528, 0.1483],
        [0.2592, 0.0615, 0.0485, 0.0944, 0.1320, 0.1304, 0.1554, 0.1187],
        [0.3107, 0.0431, 0.0400, 0.0662, 0.1724, 0.1292, 0.1149, 0.1235],
        [0.3645, 0.0364, 0.0322, 0.0676, 0.1309, 0.1179, 0.1252, 0.1254],
        [0.3120, 0.0609, 0.0521, 0.0956, 0.1446, 0.1208, 0.1141, 0.1000],
        [0.3449, 0.0407, 0.0404, 0.0648, 0.1434, 0.1660, 0.0996, 0.1003],
        [0.4309, 0.0365, 0.0335, 0.0693, 0.1199, 0.0994, 0.0990, 0.1115],
        [0.5009, 0.0286, 0.0289, 0.0455, 0.0935, 0.0864, 0.1037, 0.1125],
        [0.4538, 0.0501, 0.0445, 0.0795, 0.1062, 0.0860, 0.0851, 0.0949],
        [0.4918, 0.0361, 0.0352, 0.0573, 0.1241, 0.0800, 0.0921, 0.0834],
        [0.5680, 0.0311, 0.0295, 0.0600, 0.0710, 0.0625, 0.0868, 0.0911],
        [0.6717, 0.0226, 0.0233, 0.0346, 0.0548, 0.0577, 0.0651, 0.0703],
        [0.6646, 0.0202, 0.0203, 0.0271, 0.0594, 0.0613, 0.0615, 0.0855]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0929, 0.1699, 0.1423, 0.1020, 0.1369, 0.1221, 0.1286, 0.1054],
        [0.1471, 0.1237, 0.1194, 0.1266, 0.1113, 0.1216, 0.1267, 0.1236],
        [0.0937, 0.1483, 0.1401, 0.1279, 0.1050, 0.1545, 0.1333, 0.0971],
        [0.1260, 0.1367, 0.1396, 0.1290, 0.0979, 0.1201, 0.1574, 0.0933],
        [0.1472, 0.0822, 0.0820, 0.1391, 0.1307, 0.1247, 0.1311, 0.1631],
        [0.0919, 0.1512, 0.1305, 0.1437, 0.1143, 0.1355, 0.1263, 0.1066],
        [0.1175, 0.1317, 0.1349, 0.1407, 0.1108, 0.1012, 0.1221, 0.1410],
        [0.1323, 0.0835, 0.0855, 0.1479, 0.1566, 0.1345, 0.1271, 0.1326],
        [0.1556, 0.0731, 0.0837, 0.1374, 0.1190, 0.1213, 0.1501, 0.1599],
        [0.1052, 0.1407, 0.1306, 0.1213, 0.1301, 0.1326, 0.1224, 0.1170],
        [0.1156, 0.1452, 0.1466, 0.1075, 0.1254, 0.1208, 0.1221, 0.1168],
        [0.1600, 0.0760, 0.0825, 0.1697, 0.1292, 0.1170, 0.1149, 0.1506],
        [0.1624, 0.0688, 0.0791, 0.1479, 0.1457, 0.1222, 0.1559, 0.1181],
        [0.1807, 0.0665, 0.0756, 0.1355, 0.1270, 0.1351, 0.1366, 0.1429]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 42 [   0/390]  Loss: 0.2162 (0.216)  Acc@1: 93.7500 (93.7500)  Acc@5: 100.0000 (100.0000)LR: 2.484e-03
Train: 42 [  50/390]  Loss: 0.1772 (0.188)  Acc@1: 96.8750 (93.2904)  Acc@5: 100.0000 (99.9081)LR: 2.484e-03
Train: 42 [ 100/390]  Loss: 0.3325 (0.189)  Acc@1: 87.5000 (93.3014)  Acc@5: 100.0000 (99.8917)LR: 2.484e-03
Train: 42 [ 150/390]  Loss: 0.08428 (0.187)  Acc@1: 100.0000 (93.4396)  Acc@5: 100.0000 (99.8862)LR: 2.484e-03
Train: 42 [ 200/390]  Loss: 0.4393 (0.190)  Acc@1: 81.2500 (93.3535)  Acc@5: 100.0000 (99.8834)LR: 2.484e-03
Train: 42 [ 250/390]  Loss: 0.3512 (0.194)  Acc@1: 90.6250 (93.2209)  Acc@5: 100.0000 (99.8693)LR: 2.484e-03
Train: 42 [ 300/390]  Loss: 0.1128 (0.192)  Acc@1: 96.8750 (93.3399)  Acc@5: 100.0000 (99.8806)LR: 2.484e-03
Train: 42 [ 350/390]  Loss: 0.1884 (0.190)  Acc@1: 92.1875 (93.4161)  Acc@5: 100.0000 (99.8976)LR: 2.484e-03
Train: 42 [ 390/390]  Loss: 0.08996 (0.190)  Acc@1: 97.5000 (93.3800)  Acc@5: 100.0000 (99.9040)LR: 2.484e-03
train_acc 93.380000
Valid: 42 [   0/390]  Loss: 0.4080 (0.408)  Acc@1: 82.8125 (82.8125)  Acc@5: 98.4375 (98.4375)
Valid: 42 [  50/390]  Loss: 0.2859 (0.374)  Acc@1: 87.5000 (88.4498)  Acc@5: 100.0000 (99.4485)
Valid: 42 [ 100/390]  Loss: 0.1705 (0.356)  Acc@1: 96.8750 (88.8304)  Acc@5: 98.4375 (99.4276)
Valid: 42 [ 150/390]  Loss: 0.1810 (0.357)  Acc@1: 93.7500 (88.5141)  Acc@5: 100.0000 (99.4723)
Valid: 42 [ 200/390]  Loss: 0.3173 (0.364)  Acc@1: 90.6250 (88.2463)  Acc@5: 98.4375 (99.4558)
Valid: 42 [ 250/390]  Loss: 0.4288 (0.367)  Acc@1: 85.9375 (88.1412)  Acc@5: 100.0000 (99.4771)
Valid: 42 [ 300/390]  Loss: 0.1901 (0.363)  Acc@1: 89.0625 (88.1074)  Acc@5: 100.0000 (99.4809)
Valid: 42 [ 350/390]  Loss: 0.5054 (0.370)  Acc@1: 81.2500 (87.8917)  Acc@5: 100.0000 (99.4569)
Valid: 42 [ 390/390]  Loss: 0.2997 (0.368)  Acc@1: 90.0000 (87.8920)  Acc@5: 100.0000 (99.5040)
valid_acc 87.892000
epoch = 42   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('dil_conv_3x3', 1), ('dil_conv_5x5', 2), ('dil_conv_3x3', 1), ('dil_conv_5x5', 3), ('sep_conv_3x3', 2), ('skip_connect', 2), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1422, 0.0482, 0.0435, 0.0797, 0.2495, 0.1761, 0.1445, 0.1164],
        [0.2675, 0.0348, 0.0338, 0.0532, 0.1905, 0.1205, 0.1523, 0.1474],
        [0.2642, 0.0603, 0.0481, 0.0941, 0.1319, 0.1292, 0.1548, 0.1174],
        [0.3210, 0.0423, 0.0394, 0.0652, 0.1711, 0.1271, 0.1126, 0.1213],
        [0.3710, 0.0356, 0.0317, 0.0662, 0.1304, 0.1177, 0.1237, 0.1237],
        [0.3197, 0.0599, 0.0515, 0.0953, 0.1446, 0.1170, 0.1133, 0.0987],
        [0.3558, 0.0399, 0.0396, 0.0636, 0.1405, 0.1635, 0.0991, 0.0981],
        [0.4421, 0.0356, 0.0329, 0.0676, 0.1186, 0.0968, 0.0965, 0.1098],
        [0.5152, 0.0280, 0.0285, 0.0448, 0.0902, 0.0841, 0.1001, 0.1091],
        [0.4683, 0.0485, 0.0435, 0.0783, 0.1024, 0.0838, 0.0828, 0.0925],
        [0.5095, 0.0349, 0.0341, 0.0553, 0.1195, 0.0775, 0.0889, 0.0802],
        [0.5810, 0.0303, 0.0289, 0.0581, 0.0691, 0.0605, 0.0837, 0.0885],
        [0.6829, 0.0219, 0.0228, 0.0336, 0.0526, 0.0557, 0.0621, 0.0685],
        [0.6787, 0.0196, 0.0198, 0.0264, 0.0566, 0.0586, 0.0584, 0.0818]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0919, 0.1711, 0.1430, 0.1010, 0.1371, 0.1219, 0.1287, 0.1053],
        [0.1487, 0.1223, 0.1192, 0.1261, 0.1111, 0.1218, 0.1272, 0.1236],
        [0.0931, 0.1493, 0.1421, 0.1281, 0.1045, 0.1548, 0.1323, 0.0957],
        [0.1264, 0.1351, 0.1389, 0.1291, 0.0973, 0.1206, 0.1586, 0.0940],
        [0.1461, 0.0814, 0.0824, 0.1392, 0.1310, 0.1247, 0.1323, 0.1631],
        [0.0915, 0.1509, 0.1307, 0.1441, 0.1147, 0.1353, 0.1267, 0.1060],
        [0.1189, 0.1299, 0.1342, 0.1410, 0.1103, 0.1010, 0.1225, 0.1421],
        [0.1315, 0.0825, 0.0855, 0.1480, 0.1574, 0.1345, 0.1276, 0.1331],
        [0.1556, 0.0725, 0.0838, 0.1380, 0.1187, 0.1218, 0.1504, 0.1592],
        [0.1049, 0.1403, 0.1311, 0.1223, 0.1304, 0.1326, 0.1218, 0.1166],
        [0.1158, 0.1427, 0.1459, 0.1085, 0.1254, 0.1207, 0.1233, 0.1175],
        [0.1606, 0.0748, 0.0825, 0.1703, 0.1298, 0.1183, 0.1141, 0.1497],
        [0.1632, 0.0679, 0.0792, 0.1487, 0.1458, 0.1233, 0.1550, 0.1169],
        [0.1819, 0.0653, 0.0753, 0.1354, 0.1269, 0.1351, 0.1377, 0.1424]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 43 [   0/390]  Loss: 0.2344 (0.234)  Acc@1: 90.6250 (90.6250)  Acc@5: 100.0000 (100.0000)LR: 2.142e-03
Train: 43 [  50/390]  Loss: 0.2411 (0.183)  Acc@1: 90.6250 (93.3824)  Acc@5: 100.0000 (99.8775)LR: 2.142e-03
Train: 43 [ 100/390]  Loss: 0.2403 (0.191)  Acc@1: 95.3125 (93.0538)  Acc@5: 100.0000 (99.8917)LR: 2.142e-03
Train: 43 [ 150/390]  Loss: 0.2286 (0.187)  Acc@1: 95.3125 (93.3361)  Acc@5: 100.0000 (99.8758)LR: 2.142e-03
Train: 43 [ 200/390]  Loss: 0.1791 (0.190)  Acc@1: 93.7500 (93.3302)  Acc@5: 100.0000 (99.8290)LR: 2.142e-03
Train: 43 [ 250/390]  Loss: 0.1589 (0.189)  Acc@1: 96.8750 (93.3640)  Acc@5: 100.0000 (99.8257)LR: 2.142e-03
Train: 43 [ 300/390]  Loss: 0.1481 (0.190)  Acc@1: 96.8750 (93.3866)  Acc@5: 100.0000 (99.8443)LR: 2.142e-03
Train: 43 [ 350/390]  Loss: 0.2762 (0.187)  Acc@1: 93.7500 (93.4963)  Acc@5: 100.0000 (99.8665)LR: 2.142e-03
Train: 43 [ 390/390]  Loss: 0.09321 (0.187)  Acc@1: 97.5000 (93.4240)  Acc@5: 100.0000 (99.8760)LR: 2.142e-03
train_acc 93.424000
Valid: 43 [   0/390]  Loss: 0.2711 (0.271)  Acc@1: 90.6250 (90.6250)  Acc@5: 100.0000 (100.0000)
Valid: 43 [  50/390]  Loss: 0.2841 (0.350)  Acc@1: 92.1875 (89.0319)  Acc@5: 100.0000 (99.5711)
Valid: 43 [ 100/390]  Loss: 0.2867 (0.364)  Acc@1: 90.6250 (88.3818)  Acc@5: 100.0000 (99.5823)
Valid: 43 [ 150/390]  Loss: 0.3990 (0.354)  Acc@1: 87.5000 (88.7314)  Acc@5: 100.0000 (99.5757)
Valid: 43 [ 200/390]  Loss: 0.3155 (0.367)  Acc@1: 87.5000 (88.1841)  Acc@5: 98.4375 (99.4714)
Valid: 43 [ 250/390]  Loss: 0.5204 (0.367)  Acc@1: 87.5000 (88.1536)  Acc@5: 100.0000 (99.4958)
Valid: 43 [ 300/390]  Loss: 0.4389 (0.371)  Acc@1: 87.5000 (87.9672)  Acc@5: 100.0000 (99.5069)
Valid: 43 [ 350/390]  Loss: 0.5451 (0.370)  Acc@1: 87.5000 (87.9630)  Acc@5: 96.8750 (99.5192)
Valid: 43 [ 390/390]  Loss: 0.2742 (0.369)  Acc@1: 90.0000 (87.9760)  Acc@5: 100.0000 (99.5200)
valid_acc 87.976000
epoch = 43   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('skip_connect', 1), ('dil_conv_5x5', 2), ('dil_conv_3x3', 1), ('dil_conv_5x5', 3), ('sep_conv_3x3', 2), ('skip_connect', 2), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1429, 0.0472, 0.0429, 0.0788, 0.2542, 0.1750, 0.1428, 0.1161],
        [0.2717, 0.0342, 0.0333, 0.0523, 0.1892, 0.1204, 0.1523, 0.1465],
        [0.2705, 0.0597, 0.0480, 0.0942, 0.1321, 0.1265, 0.1538, 0.1152],
        [0.3253, 0.0418, 0.0389, 0.0645, 0.1712, 0.1265, 0.1117, 0.1201],
        [0.3793, 0.0353, 0.0315, 0.0654, 0.1300, 0.1149, 0.1219, 0.1217],
        [0.3285, 0.0591, 0.0509, 0.0947, 0.1426, 0.1151, 0.1123, 0.0969],
        [0.3664, 0.0395, 0.0390, 0.0628, 0.1382, 0.1608, 0.0973, 0.0961],
        [0.4555, 0.0350, 0.0324, 0.0667, 0.1163, 0.0939, 0.0940, 0.1062],
        [0.5313, 0.0275, 0.0280, 0.0437, 0.0864, 0.0807, 0.0969, 0.1055],
        [0.4828, 0.0473, 0.0425, 0.0766, 0.1000, 0.0806, 0.0802, 0.0901],
        [0.5226, 0.0342, 0.0335, 0.0541, 0.1164, 0.0757, 0.0860, 0.0774],
        [0.5947, 0.0296, 0.0282, 0.0561, 0.0671, 0.0583, 0.0813, 0.0848],
        [0.6953, 0.0214, 0.0222, 0.0325, 0.0502, 0.0537, 0.0595, 0.0653],
        [0.6929, 0.0190, 0.0193, 0.0257, 0.0541, 0.0556, 0.0554, 0.0779]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0905, 0.1726, 0.1446, 0.1002, 0.1370, 0.1218, 0.1286, 0.1045],
        [0.1509, 0.1214, 0.1191, 0.1265, 0.1109, 0.1221, 0.1262, 0.1229],
        [0.0924, 0.1491, 0.1428, 0.1279, 0.1041, 0.1550, 0.1334, 0.0954],
        [0.1270, 0.1339, 0.1387, 0.1295, 0.0968, 0.1211, 0.1585, 0.0945],
        [0.1464, 0.0805, 0.0825, 0.1394, 0.1313, 0.1252, 0.1312, 0.1635],
        [0.0910, 0.1518, 0.1317, 0.1441, 0.1149, 0.1352, 0.1266, 0.1047],
        [0.1196, 0.1280, 0.1331, 0.1425, 0.1099, 0.1014, 0.1225, 0.1430],
        [0.1316, 0.0817, 0.0857, 0.1486, 0.1565, 0.1350, 0.1270, 0.1340],
        [0.1546, 0.0717, 0.0839, 0.1380, 0.1185, 0.1210, 0.1519, 0.1603],
        [0.1043, 0.1402, 0.1308, 0.1235, 0.1302, 0.1333, 0.1214, 0.1163],
        [0.1168, 0.1409, 0.1451, 0.1095, 0.1249, 0.1207, 0.1247, 0.1174],
        [0.1625, 0.0736, 0.0816, 0.1698, 0.1298, 0.1183, 0.1141, 0.1503],
        [0.1636, 0.0668, 0.0785, 0.1477, 0.1472, 0.1246, 0.1545, 0.1171],
        [0.1831, 0.0644, 0.0747, 0.1344, 0.1270, 0.1344, 0.1378, 0.1443]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 44 [   0/390]  Loss: 0.1126 (0.113)  Acc@1: 100.0000 (100.0000)  Acc@5: 100.0000 (100.0000)LR: 1.843e-03
Train: 44 [  50/390]  Loss: 0.1402 (0.171)  Acc@1: 93.7500 (94.2096)  Acc@5: 100.0000 (99.9387)LR: 1.843e-03
Train: 44 [ 100/390]  Loss: 0.1565 (0.189)  Acc@1: 95.3125 (93.4715)  Acc@5: 100.0000 (99.8917)LR: 1.843e-03
Train: 44 [ 150/390]  Loss: 0.08976 (0.185)  Acc@1: 96.8750 (93.5327)  Acc@5: 100.0000 (99.9069)LR: 1.843e-03
Train: 44 [ 200/390]  Loss: 0.2355 (0.192)  Acc@1: 90.6250 (93.2214)  Acc@5: 100.0000 (99.8834)LR: 1.843e-03
Train: 44 [ 250/390]  Loss: 0.2584 (0.187)  Acc@1: 87.5000 (93.4512)  Acc@5: 100.0000 (99.8942)LR: 1.843e-03
Train: 44 [ 300/390]  Loss: 0.06886 (0.189)  Acc@1: 100.0000 (93.4489)  Acc@5: 100.0000 (99.8858)LR: 1.843e-03
Train: 44 [ 350/390]  Loss: 0.1710 (0.188)  Acc@1: 93.7500 (93.5141)  Acc@5: 100.0000 (99.8976)LR: 1.843e-03
Train: 44 [ 390/390]  Loss: 0.1454 (0.188)  Acc@1: 95.0000 (93.4680)  Acc@5: 100.0000 (99.9040)LR: 1.843e-03
train_acc 93.468000
Valid: 44 [   0/390]  Loss: 0.3039 (0.304)  Acc@1: 92.1875 (92.1875)  Acc@5: 100.0000 (100.0000)
Valid: 44 [  50/390]  Loss: 0.4649 (0.353)  Acc@1: 85.9375 (88.5723)  Acc@5: 96.8750 (99.2647)
Valid: 44 [ 100/390]  Loss: 0.4717 (0.348)  Acc@1: 79.6875 (88.3663)  Acc@5: 98.4375 (99.4895)
Valid: 44 [ 150/390]  Loss: 0.1794 (0.354)  Acc@1: 92.1875 (88.2968)  Acc@5: 100.0000 (99.4619)
Valid: 44 [ 200/390]  Loss: 0.4804 (0.362)  Acc@1: 81.2500 (88.1919)  Acc@5: 100.0000 (99.4403)
Valid: 44 [ 250/390]  Loss: 0.5339 (0.360)  Acc@1: 82.8125 (88.2346)  Acc@5: 100.0000 (99.4460)
Valid: 44 [ 300/390]  Loss: 0.5014 (0.361)  Acc@1: 85.9375 (88.2319)  Acc@5: 96.8750 (99.4290)
Valid: 44 [ 350/390]  Loss: 0.4732 (0.361)  Acc@1: 85.9375 (88.1366)  Acc@5: 100.0000 (99.4569)
Valid: 44 [ 390/390]  Loss: 0.3224 (0.362)  Acc@1: 87.5000 (88.0720)  Acc@5: 97.5000 (99.4680)
valid_acc 88.072000
epoch = 44   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('dil_conv_3x3', 1), ('dil_conv_5x5', 2), ('dil_conv_3x3', 1), ('dil_conv_5x5', 3), ('sep_conv_3x3', 2), ('skip_connect', 2), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1434, 0.0463, 0.0424, 0.0780, 0.2561, 0.1746, 0.1435, 0.1156],
        [0.2773, 0.0332, 0.0326, 0.0512, 0.1882, 0.1190, 0.1526, 0.1459],
        [0.2763, 0.0586, 0.0476, 0.0936, 0.1318, 0.1258, 0.1523, 0.1140],
        [0.3328, 0.0409, 0.0384, 0.0634, 0.1705, 0.1245, 0.1109, 0.1186],
        [0.3870, 0.0348, 0.0314, 0.0649, 0.1284, 0.1136, 0.1201, 0.1198],
        [0.3364, 0.0580, 0.0502, 0.0936, 0.1420, 0.1127, 0.1116, 0.0955],
        [0.3791, 0.0386, 0.0386, 0.0621, 0.1354, 0.1562, 0.0956, 0.0944],
        [0.4678, 0.0343, 0.0321, 0.0657, 0.1144, 0.0913, 0.0909, 0.1035],
        [0.5448, 0.0270, 0.0276, 0.0432, 0.0833, 0.0777, 0.0939, 0.1024],
        [0.4947, 0.0459, 0.0417, 0.0753, 0.0975, 0.0787, 0.0782, 0.0880],
        [0.5386, 0.0332, 0.0328, 0.0528, 0.1120, 0.0740, 0.0821, 0.0744],
        [0.6050, 0.0290, 0.0279, 0.0550, 0.0656, 0.0564, 0.0788, 0.0824],
        [0.7060, 0.0208, 0.0217, 0.0317, 0.0480, 0.0520, 0.0574, 0.0625],
        [0.7046, 0.0186, 0.0189, 0.0254, 0.0523, 0.0530, 0.0530, 0.0742]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0905, 0.1716, 0.1450, 0.1001, 0.1365, 0.1218, 0.1306, 0.1039],
        [0.1513, 0.1203, 0.1186, 0.1256, 0.1108, 0.1238, 0.1273, 0.1223],
        [0.0922, 0.1465, 0.1428, 0.1291, 0.1040, 0.1568, 0.1333, 0.0952],
        [0.1271, 0.1319, 0.1379, 0.1310, 0.0969, 0.1216, 0.1601, 0.0936],
        [0.1460, 0.0795, 0.0826, 0.1391, 0.1319, 0.1259, 0.1312, 0.1638],
        [0.0914, 0.1500, 0.1314, 0.1447, 0.1154, 0.1349, 0.1266, 0.1055],
        [0.1205, 0.1261, 0.1321, 0.1440, 0.1107, 0.1012, 0.1225, 0.1429],
        [0.1304, 0.0809, 0.0855, 0.1480, 0.1577, 0.1358, 0.1274, 0.1343],
        [0.1530, 0.0710, 0.0840, 0.1378, 0.1190, 0.1219, 0.1519, 0.1615],
        [0.1052, 0.1373, 0.1298, 0.1239, 0.1319, 0.1338, 0.1214, 0.1168],
        [0.1175, 0.1381, 0.1438, 0.1093, 0.1257, 0.1215, 0.1257, 0.1184],
        [0.1626, 0.0720, 0.0806, 0.1681, 0.1305, 0.1194, 0.1146, 0.1522],
        [0.1641, 0.0656, 0.0778, 0.1461, 0.1489, 0.1250, 0.1555, 0.1170],
        [0.1844, 0.0629, 0.0739, 0.1331, 0.1278, 0.1349, 0.1376, 0.1454]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 45 [   0/390]  Loss: 0.3966 (0.397)  Acc@1: 89.0625 (89.0625)  Acc@5: 100.0000 (100.0000)LR: 1.587e-03
Train: 45 [  50/390]  Loss: 0.05706 (0.184)  Acc@1: 98.4375 (93.7500)  Acc@5: 100.0000 (99.8468)LR: 1.587e-03
Train: 45 [ 100/390]  Loss: 0.1047 (0.183)  Acc@1: 100.0000 (93.7964)  Acc@5: 100.0000 (99.8762)LR: 1.587e-03
Train: 45 [ 150/390]  Loss: 0.1809 (0.183)  Acc@1: 95.3125 (93.7810)  Acc@5: 98.4375 (99.9069)LR: 1.587e-03
Train: 45 [ 200/390]  Loss: 0.1432 (0.181)  Acc@1: 95.3125 (93.8977)  Acc@5: 100.0000 (99.9067)LR: 1.587e-03
Train: 45 [ 250/390]  Loss: 0.3756 (0.184)  Acc@1: 85.9375 (93.6504)  Acc@5: 100.0000 (99.9128)LR: 1.587e-03
Train: 45 [ 300/390]  Loss: 0.3587 (0.184)  Acc@1: 84.3750 (93.7604)  Acc@5: 100.0000 (99.9118)LR: 1.587e-03
Train: 45 [ 350/390]  Loss: 0.2434 (0.184)  Acc@1: 89.0625 (93.6788)  Acc@5: 100.0000 (99.9199)LR: 1.587e-03
Train: 45 [ 390/390]  Loss: 0.3214 (0.186)  Acc@1: 90.0000 (93.6320)  Acc@5: 100.0000 (99.9200)LR: 1.587e-03
train_acc 93.632000
Valid: 45 [   0/390]  Loss: 0.4659 (0.466)  Acc@1: 81.2500 (81.2500)  Acc@5: 100.0000 (100.0000)
Valid: 45 [  50/390]  Loss: 0.3691 (0.444)  Acc@1: 87.5000 (85.1103)  Acc@5: 100.0000 (99.2341)
Valid: 45 [ 100/390]  Loss: 0.4304 (0.434)  Acc@1: 85.9375 (85.7673)  Acc@5: 96.8750 (99.3193)
Valid: 45 [ 150/390]  Loss: 0.5241 (0.437)  Acc@1: 79.6875 (85.5857)  Acc@5: 98.4375 (99.3377)
Valid: 45 [ 200/390]  Loss: 0.3366 (0.432)  Acc@1: 85.9375 (85.7276)  Acc@5: 100.0000 (99.3626)
Valid: 45 [ 250/390]  Loss: 0.3973 (0.431)  Acc@1: 87.5000 (85.8254)  Acc@5: 100.0000 (99.3526)
Valid: 45 [ 300/390]  Loss: 0.3519 (0.431)  Acc@1: 90.6250 (85.8596)  Acc@5: 100.0000 (99.3252)
Valid: 45 [ 350/390]  Loss: 0.2694 (0.433)  Acc@1: 89.0625 (85.7817)  Acc@5: 100.0000 (99.3456)
Valid: 45 [ 390/390]  Loss: 0.3463 (0.434)  Acc@1: 85.0000 (85.8000)  Acc@5: 100.0000 (99.3160)
valid_acc 85.800000
epoch = 45   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('dil_conv_3x3', 1), ('dil_conv_5x5', 2), ('dil_conv_3x3', 1), ('dil_conv_5x5', 3), ('sep_conv_3x3', 2), ('skip_connect', 2), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1441, 0.0451, 0.0419, 0.0773, 0.2593, 0.1741, 0.1428, 0.1153],
        [0.2819, 0.0325, 0.0321, 0.0503, 0.1889, 0.1176, 0.1522, 0.1445],
        [0.2823, 0.0575, 0.0473, 0.0927, 0.1315, 0.1246, 0.1515, 0.1126],
        [0.3409, 0.0400, 0.0376, 0.0620, 0.1695, 0.1223, 0.1095, 0.1182],
        [0.3946, 0.0342, 0.0311, 0.0640, 0.1272, 0.1120, 0.1186, 0.1182],
        [0.3446, 0.0571, 0.0499, 0.0935, 0.1408, 0.1100, 0.1102, 0.0939],
        [0.3895, 0.0378, 0.0379, 0.0610, 0.1335, 0.1536, 0.0942, 0.0925],
        [0.4783, 0.0336, 0.0317, 0.0647, 0.1127, 0.0888, 0.0885, 0.1018],
        [0.5556, 0.0264, 0.0273, 0.0425, 0.0803, 0.0764, 0.0919, 0.0995],
        [0.5075, 0.0447, 0.0409, 0.0739, 0.0950, 0.0767, 0.0759, 0.0855],
        [0.5544, 0.0322, 0.0319, 0.0511, 0.1078, 0.0721, 0.0791, 0.0714],
        [0.6172, 0.0283, 0.0273, 0.0534, 0.0630, 0.0542, 0.0760, 0.0805],
        [0.7156, 0.0204, 0.0214, 0.0310, 0.0461, 0.0506, 0.0551, 0.0599],
        [0.7139, 0.0182, 0.0187, 0.0250, 0.0506, 0.0510, 0.0509, 0.0717]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0905, 0.1718, 0.1463, 0.0988, 0.1362, 0.1217, 0.1308, 0.1039],
        [0.1516, 0.1201, 0.1188, 0.1254, 0.1112, 0.1233, 0.1274, 0.1223],
        [0.0921, 0.1456, 0.1437, 0.1287, 0.1030, 0.1574, 0.1345, 0.0950],
        [0.1264, 0.1315, 0.1383, 0.1316, 0.0964, 0.1227, 0.1598, 0.0933],
        [0.1459, 0.0786, 0.0828, 0.1388, 0.1326, 0.1254, 0.1322, 0.1636],
        [0.0913, 0.1498, 0.1325, 0.1451, 0.1150, 0.1345, 0.1269, 0.1050],
        [0.1208, 0.1251, 0.1316, 0.1447, 0.1113, 0.1000, 0.1236, 0.1430],
        [0.1310, 0.0794, 0.0854, 0.1478, 0.1586, 0.1353, 0.1277, 0.1348],
        [0.1529, 0.0704, 0.0843, 0.1380, 0.1180, 0.1217, 0.1529, 0.1619],
        [0.1048, 0.1355, 0.1301, 0.1241, 0.1332, 0.1345, 0.1206, 0.1170],
        [0.1174, 0.1364, 0.1435, 0.1096, 0.1269, 0.1213, 0.1267, 0.1182],
        [0.1640, 0.0707, 0.0806, 0.1681, 0.1308, 0.1196, 0.1139, 0.1523],
        [0.1647, 0.0650, 0.0780, 0.1469, 0.1507, 0.1257, 0.1528, 0.1160],
        [0.1854, 0.0617, 0.0737, 0.1322, 0.1285, 0.1353, 0.1382, 0.1451]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 46 [   0/390]  Loss: 0.1216 (0.122)  Acc@1: 95.3125 (95.3125)  Acc@5: 100.0000 (100.0000)LR: 1.377e-03
Train: 46 [  50/390]  Loss: 0.1362 (0.179)  Acc@1: 96.8750 (94.0564)  Acc@5: 100.0000 (99.9081)LR: 1.377e-03
Train: 46 [ 100/390]  Loss: 0.3517 (0.180)  Acc@1: 90.6250 (93.8428)  Acc@5: 100.0000 (99.9072)LR: 1.377e-03
Train: 46 [ 150/390]  Loss: 0.1787 (0.177)  Acc@1: 96.8750 (93.9466)  Acc@5: 100.0000 (99.9172)LR: 1.377e-03
Train: 46 [ 200/390]  Loss: 0.4358 (0.183)  Acc@1: 82.8125 (93.7267)  Acc@5: 100.0000 (99.8989)LR: 1.377e-03
Train: 46 [ 250/390]  Loss: 0.1764 (0.181)  Acc@1: 93.7500 (93.7811)  Acc@5: 100.0000 (99.9066)LR: 1.377e-03
Train: 46 [ 300/390]  Loss: 0.1681 (0.185)  Acc@1: 92.1875 (93.6150)  Acc@5: 100.0000 (99.9118)LR: 1.377e-03
Train: 46 [ 350/390]  Loss: 0.1494 (0.183)  Acc@1: 93.7500 (93.6432)  Acc@5: 100.0000 (99.9199)LR: 1.377e-03
Train: 46 [ 390/390]  Loss: 0.3469 (0.184)  Acc@1: 92.5000 (93.5840)  Acc@5: 100.0000 (99.9120)LR: 1.377e-03
train_acc 93.584000
Valid: 46 [   0/390]  Loss: 0.3147 (0.315)  Acc@1: 87.5000 (87.5000)  Acc@5: 100.0000 (100.0000)
Valid: 46 [  50/390]  Loss: 0.4214 (0.393)  Acc@1: 84.3750 (86.7341)  Acc@5: 98.4375 (99.5404)
Valid: 46 [ 100/390]  Loss: 0.4520 (0.384)  Acc@1: 89.0625 (87.3608)  Acc@5: 98.4375 (99.5514)
Valid: 46 [ 150/390]  Loss: 0.5545 (0.381)  Acc@1: 84.3750 (87.5207)  Acc@5: 100.0000 (99.4826)
Valid: 46 [ 200/390]  Loss: 0.3212 (0.379)  Acc@1: 85.9375 (87.6322)  Acc@5: 100.0000 (99.4947)
Valid: 46 [ 250/390]  Loss: 0.6365 (0.378)  Acc@1: 82.8125 (87.7303)  Acc@5: 98.4375 (99.5144)
Valid: 46 [ 300/390]  Loss: 0.3700 (0.379)  Acc@1: 84.3750 (87.6038)  Acc@5: 100.0000 (99.5120)
Valid: 46 [ 350/390]  Loss: 0.1590 (0.379)  Acc@1: 93.7500 (87.5089)  Acc@5: 98.4375 (99.5148)
Valid: 46 [ 390/390]  Loss: 0.2028 (0.377)  Acc@1: 92.5000 (87.5600)  Acc@5: 100.0000 (99.5080)
valid_acc 87.560000
epoch = 46   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('dil_conv_3x3', 1), ('dil_conv_5x5', 2), ('dil_conv_3x3', 1), ('dil_conv_5x5', 3), ('sep_conv_3x3', 2), ('skip_connect', 2), ('sep_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1457, 0.0443, 0.0414, 0.0766, 0.2608, 0.1745, 0.1433, 0.1134],
        [0.2864, 0.0319, 0.0316, 0.0494, 0.1889, 0.1170, 0.1506, 0.1442],
        [0.2905, 0.0562, 0.0464, 0.0913, 0.1310, 0.1231, 0.1492, 0.1123],
        [0.3477, 0.0394, 0.0370, 0.0609, 0.1688, 0.1202, 0.1089, 0.1171],
        [0.4008, 0.0338, 0.0306, 0.0627, 0.1274, 0.1113, 0.1165, 0.1169],
        [0.3552, 0.0561, 0.0489, 0.0920, 0.1395, 0.1085, 0.1085, 0.0914],
        [0.3987, 0.0371, 0.0373, 0.0603, 0.1322, 0.1507, 0.0927, 0.0911],
        [0.4895, 0.0331, 0.0311, 0.0632, 0.1117, 0.0859, 0.0856, 0.1000],
        [0.5656, 0.0261, 0.0269, 0.0418, 0.0771, 0.0744, 0.0901, 0.0980],
        [0.5220, 0.0429, 0.0395, 0.0712, 0.0930, 0.0742, 0.0739, 0.0833],
        [0.5690, 0.0315, 0.0313, 0.0499, 0.1037, 0.0698, 0.0761, 0.0688],
        [0.6267, 0.0277, 0.0269, 0.0520, 0.0618, 0.0526, 0.0739, 0.0784],
        [0.7241, 0.0200, 0.0210, 0.0303, 0.0445, 0.0488, 0.0532, 0.0581],
        [0.7257, 0.0177, 0.0182, 0.0244, 0.0485, 0.0484, 0.0487, 0.0683]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0906, 0.1719, 0.1470, 0.0982, 0.1360, 0.1214, 0.1322, 0.1028],
        [0.1518, 0.1195, 0.1192, 0.1248, 0.1109, 0.1244, 0.1269, 0.1225],
        [0.0924, 0.1454, 0.1442, 0.1284, 0.1025, 0.1581, 0.1339, 0.0951],
        [0.1262, 0.1313, 0.1394, 0.1315, 0.0959, 0.1231, 0.1597, 0.0931],
        [0.1456, 0.0781, 0.0829, 0.1387, 0.1333, 0.1255, 0.1313, 0.1647],
        [0.0916, 0.1479, 0.1313, 0.1450, 0.1158, 0.1357, 0.1272, 0.1055],
        [0.1209, 0.1234, 0.1308, 0.1457, 0.1117, 0.0995, 0.1235, 0.1446],
        [0.1316, 0.0780, 0.0850, 0.1475, 0.1589, 0.1350, 0.1284, 0.1356],
        [0.1523, 0.0690, 0.0842, 0.1372, 0.1174, 0.1235, 0.1537, 0.1628],
        [0.1047, 0.1334, 0.1286, 0.1249, 0.1340, 0.1360, 0.1199, 0.1185],
        [0.1176, 0.1346, 0.1433, 0.1100, 0.1281, 0.1212, 0.1264, 0.1188],
        [0.1649, 0.0696, 0.0802, 0.1685, 0.1301, 0.1194, 0.1143, 0.1530],
        [0.1635, 0.0637, 0.0780, 0.1464, 0.1525, 0.1270, 0.1519, 0.1168],
        [0.1862, 0.0609, 0.0738, 0.1322, 0.1277, 0.1348, 0.1385, 0.1459]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 47 [   0/390]  Loss: 0.1241 (0.124)  Acc@1: 96.8750 (96.8750)  Acc@5: 100.0000 (100.0000)LR: 1.213e-03
Train: 47 [  50/390]  Loss: 0.06102 (0.175)  Acc@1: 98.4375 (94.3015)  Acc@5: 100.0000 (99.9081)LR: 1.213e-03
Train: 47 [ 100/390]  Loss: 0.1632 (0.180)  Acc@1: 95.3125 (93.9202)  Acc@5: 100.0000 (99.9381)LR: 1.213e-03
Train: 47 [ 150/390]  Loss: 0.3657 (0.178)  Acc@1: 89.0625 (94.0087)  Acc@5: 100.0000 (99.9172)LR: 1.213e-03
Train: 47 [ 200/390]  Loss: 0.1828 (0.176)  Acc@1: 95.3125 (94.0065)  Acc@5: 100.0000 (99.9223)LR: 1.213e-03
Train: 47 [ 250/390]  Loss: 0.1968 (0.182)  Acc@1: 93.7500 (93.8123)  Acc@5: 100.0000 (99.9191)LR: 1.213e-03
Train: 47 [ 300/390]  Loss: 0.2117 (0.185)  Acc@1: 90.6250 (93.7604)  Acc@5: 100.0000 (99.9066)LR: 1.213e-03
Train: 47 [ 350/390]  Loss: 0.1055 (0.184)  Acc@1: 96.8750 (93.7188)  Acc@5: 100.0000 (99.9154)LR: 1.213e-03
Train: 47 [ 390/390]  Loss: 0.1466 (0.185)  Acc@1: 97.5000 (93.6880)  Acc@5: 100.0000 (99.9120)LR: 1.213e-03
train_acc 93.688000
Valid: 47 [   0/390]  Loss: 0.3339 (0.334)  Acc@1: 87.5000 (87.5000)  Acc@5: 98.4375 (98.4375)
Valid: 47 [  50/390]  Loss: 0.3656 (0.372)  Acc@1: 85.9375 (87.8983)  Acc@5: 100.0000 (99.3873)
Valid: 47 [ 100/390]  Loss: 0.5826 (0.361)  Acc@1: 82.8125 (88.1498)  Acc@5: 98.4375 (99.4585)
Valid: 47 [ 150/390]  Loss: 0.3372 (0.371)  Acc@1: 87.5000 (87.9036)  Acc@5: 98.4375 (99.4205)
Valid: 47 [ 200/390]  Loss: 0.1982 (0.376)  Acc@1: 92.1875 (87.7721)  Acc@5: 100.0000 (99.4092)
Valid: 47 [ 250/390]  Loss: 0.2326 (0.373)  Acc@1: 92.1875 (87.8113)  Acc@5: 100.0000 (99.4397)
Valid: 47 [ 300/390]  Loss: 0.2115 (0.378)  Acc@1: 95.3125 (87.6973)  Acc@5: 100.0000 (99.4342)
Valid: 47 [ 350/390]  Loss: 0.3439 (0.381)  Acc@1: 85.9375 (87.6157)  Acc@5: 100.0000 (99.4391)
Valid: 47 [ 390/390]  Loss: 0.2210 (0.379)  Acc@1: 92.5000 (87.6360)  Acc@5: 100.0000 (99.4640)
valid_acc 87.636000
epoch = 47   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('dil_conv_3x3', 1), ('dil_conv_5x5', 2), ('dil_conv_3x3', 1), ('dil_conv_5x5', 3), ('sep_conv_3x3', 2), ('skip_connect', 2), ('sep_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1468, 0.0435, 0.0411, 0.0761, 0.2608, 0.1757, 0.1438, 0.1121],
        [0.2913, 0.0315, 0.0313, 0.0487, 0.1874, 0.1162, 0.1493, 0.1443],
        [0.2955, 0.0551, 0.0458, 0.0900, 0.1327, 0.1209, 0.1489, 0.1111],
        [0.3568, 0.0389, 0.0365, 0.0599, 0.1663, 0.1184, 0.1076, 0.1155],
        [0.4095, 0.0333, 0.0304, 0.0617, 0.1269, 0.1093, 0.1141, 0.1148],
        [0.3634, 0.0551, 0.0483, 0.0911, 0.1394, 0.1065, 0.1066, 0.0897],
        [0.4085, 0.0365, 0.0367, 0.0591, 0.1310, 0.1470, 0.0916, 0.0894],
        [0.5007, 0.0325, 0.0307, 0.0622, 0.1094, 0.0833, 0.0829, 0.0983],
        [0.5765, 0.0257, 0.0266, 0.0414, 0.0745, 0.0721, 0.0865, 0.0966],
        [0.5332, 0.0416, 0.0387, 0.0696, 0.0905, 0.0726, 0.0720, 0.0818],
        [0.5819, 0.0308, 0.0305, 0.0485, 0.0997, 0.0682, 0.0736, 0.0667],
        [0.6369, 0.0271, 0.0266, 0.0508, 0.0598, 0.0509, 0.0715, 0.0764],
        [0.7326, 0.0196, 0.0206, 0.0298, 0.0427, 0.0471, 0.0515, 0.0561],
        [0.7367, 0.0174, 0.0180, 0.0241, 0.0464, 0.0463, 0.0464, 0.0647]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0912, 0.1715, 0.1470, 0.0975, 0.1355, 0.1221, 0.1327, 0.1024],
        [0.1515, 0.1181, 0.1185, 0.1261, 0.1108, 0.1250, 0.1270, 0.1230],
        [0.0926, 0.1440, 0.1437, 0.1292, 0.1027, 0.1590, 0.1338, 0.0949],
        [0.1261, 0.1307, 0.1398, 0.1324, 0.0953, 0.1228, 0.1604, 0.0924],
        [0.1450, 0.0772, 0.0826, 0.1377, 0.1342, 0.1261, 0.1311, 0.1662],
        [0.0923, 0.1465, 0.1307, 0.1455, 0.1166, 0.1361, 0.1273, 0.1050],
        [0.1204, 0.1214, 0.1298, 0.1470, 0.1120, 0.1002, 0.1234, 0.1458],
        [0.1309, 0.0770, 0.0845, 0.1461, 0.1589, 0.1361, 0.1308, 0.1357],
        [0.1509, 0.0682, 0.0835, 0.1358, 0.1182, 0.1244, 0.1551, 0.1639],
        [0.1060, 0.1317, 0.1276, 0.1244, 0.1339, 0.1372, 0.1194, 0.1197],
        [0.1185, 0.1332, 0.1435, 0.1093, 0.1284, 0.1210, 0.1271, 0.1192],
        [0.1651, 0.0685, 0.0795, 0.1677, 0.1301, 0.1204, 0.1148, 0.1541],
        [0.1646, 0.0634, 0.0779, 0.1469, 0.1540, 0.1272, 0.1498, 0.1162],
        [0.1846, 0.0607, 0.0737, 0.1321, 0.1280, 0.1356, 0.1384, 0.1470]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 48 [   0/390]  Loss: 0.09022 (0.0902)  Acc@1: 98.4375 (98.4375)  Acc@5: 100.0000 (100.0000)LR: 1.095e-03
Train: 48 [  50/390]  Loss: 0.09618 (0.195)  Acc@1: 95.3125 (93.6887)  Acc@5: 100.0000 (99.8775)LR: 1.095e-03
Train: 48 [ 100/390]  Loss: 0.1948 (0.186)  Acc@1: 92.1875 (93.7655)  Acc@5: 98.4375 (99.8762)LR: 1.095e-03
Train: 48 [ 150/390]  Loss: 0.06412 (0.188)  Acc@1: 100.0000 (93.6362)  Acc@5: 100.0000 (99.9172)LR: 1.095e-03
Train: 48 [ 200/390]  Loss: 0.3739 (0.188)  Acc@1: 85.9375 (93.7034)  Acc@5: 100.0000 (99.9145)LR: 1.095e-03
Train: 48 [ 250/390]  Loss: 0.3114 (0.186)  Acc@1: 85.9375 (93.7375)  Acc@5: 100.0000 (99.9128)LR: 1.095e-03
Train: 48 [ 300/390]  Loss: 0.1914 (0.186)  Acc@1: 92.1875 (93.6825)  Acc@5: 100.0000 (99.9221)LR: 1.095e-03
Train: 48 [ 350/390]  Loss: 0.3033 (0.187)  Acc@1: 87.5000 (93.6610)  Acc@5: 100.0000 (99.9288)LR: 1.095e-03
Train: 48 [ 390/390]  Loss: 0.1536 (0.187)  Acc@1: 97.5000 (93.6680)  Acc@5: 100.0000 (99.9280)LR: 1.095e-03
train_acc 93.668000
Valid: 48 [   0/390]  Loss: 0.2840 (0.284)  Acc@1: 89.0625 (89.0625)  Acc@5: 100.0000 (100.0000)
Valid: 48 [  50/390]  Loss: 0.4139 (0.375)  Acc@1: 85.9375 (87.1017)  Acc@5: 100.0000 (99.4792)
Valid: 48 [ 100/390]  Loss: 0.2891 (0.356)  Acc@1: 90.6250 (88.1033)  Acc@5: 100.0000 (99.4121)
Valid: 48 [ 150/390]  Loss: 0.4543 (0.363)  Acc@1: 82.8125 (87.7483)  Acc@5: 100.0000 (99.4619)
Valid: 48 [ 200/390]  Loss: 0.5330 (0.366)  Acc@1: 78.1250 (87.7799)  Acc@5: 96.8750 (99.4248)
Valid: 48 [ 250/390]  Loss: 0.2468 (0.364)  Acc@1: 95.3125 (87.8611)  Acc@5: 100.0000 (99.4397)
Valid: 48 [ 300/390]  Loss: 0.2857 (0.367)  Acc@1: 92.1875 (87.7440)  Acc@5: 100.0000 (99.4238)
Valid: 48 [ 350/390]  Loss: 0.2498 (0.369)  Acc@1: 93.7500 (87.8250)  Acc@5: 100.0000 (99.4213)
Valid: 48 [ 390/390]  Loss: 0.1899 (0.369)  Acc@1: 95.0000 (87.8680)  Acc@5: 100.0000 (99.4320)
valid_acc 87.868000
epoch = 48   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('dil_conv_3x3', 1), ('dil_conv_5x5', 2), ('sep_conv_5x5', 0), ('dil_conv_5x5', 3), ('sep_conv_3x3', 2), ('skip_connect', 2), ('sep_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1495, 0.0426, 0.0407, 0.0752, 0.2624, 0.1759, 0.1428, 0.1108],
        [0.2941, 0.0311, 0.0310, 0.0481, 0.1885, 0.1149, 0.1488, 0.1436],
        [0.3016, 0.0539, 0.0454, 0.0890, 0.1332, 0.1203, 0.1472, 0.1094],
        [0.3640, 0.0385, 0.0363, 0.0593, 0.1653, 0.1174, 0.1049, 0.1143],
        [0.4164, 0.0330, 0.0302, 0.0610, 0.1262, 0.1087, 0.1116, 0.1129],
        [0.3712, 0.0540, 0.0475, 0.0896, 0.1404, 0.1046, 0.1050, 0.0877],
        [0.4149, 0.0363, 0.0363, 0.0585, 0.1307, 0.1452, 0.0902, 0.0879],
        [0.5110, 0.0321, 0.0303, 0.0609, 0.1076, 0.0816, 0.0806, 0.0959],
        [0.5863, 0.0256, 0.0265, 0.0410, 0.0719, 0.0708, 0.0840, 0.0940],
        [0.5451, 0.0403, 0.0378, 0.0677, 0.0881, 0.0712, 0.0698, 0.0801],
        [0.5926, 0.0304, 0.0302, 0.0476, 0.0964, 0.0669, 0.0712, 0.0647],
        [0.6466, 0.0268, 0.0263, 0.0495, 0.0578, 0.0495, 0.0696, 0.0739],
        [0.7394, 0.0196, 0.0206, 0.0296, 0.0412, 0.0458, 0.0497, 0.0541],
        [0.7448, 0.0172, 0.0178, 0.0240, 0.0451, 0.0447, 0.0447, 0.0617]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0919, 0.1716, 0.1475, 0.0963, 0.1347, 0.1229, 0.1327, 0.1024],
        [0.1511, 0.1179, 0.1184, 0.1255, 0.1120, 0.1258, 0.1273, 0.1219],
        [0.0937, 0.1425, 0.1427, 0.1290, 0.1027, 0.1606, 0.1341, 0.0947],
        [0.1260, 0.1297, 0.1390, 0.1339, 0.0953, 0.1231, 0.1601, 0.0929],
        [0.1450, 0.0763, 0.0818, 0.1364, 0.1356, 0.1266, 0.1312, 0.1671],
        [0.0931, 0.1451, 0.1298, 0.1456, 0.1178, 0.1362, 0.1273, 0.1051],
        [0.1199, 0.1199, 0.1286, 0.1477, 0.1123, 0.1014, 0.1225, 0.1477],
        [0.1310, 0.0763, 0.0837, 0.1448, 0.1598, 0.1366, 0.1314, 0.1364],
        [0.1489, 0.0676, 0.0828, 0.1342, 0.1184, 0.1257, 0.1567, 0.1658],
        [0.1067, 0.1301, 0.1264, 0.1248, 0.1334, 0.1382, 0.1189, 0.1216],
        [0.1184, 0.1317, 0.1423, 0.1098, 0.1289, 0.1214, 0.1276, 0.1199],
        [0.1650, 0.0677, 0.0786, 0.1662, 0.1313, 0.1212, 0.1143, 0.1556],
        [0.1647, 0.0630, 0.0776, 0.1463, 0.1553, 0.1276, 0.1493, 0.1162],
        [0.1842, 0.0604, 0.0739, 0.1320, 0.1276, 0.1351, 0.1385, 0.1483]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 49 [   0/390]  Loss: 0.3622 (0.362)  Acc@1: 85.9375 (85.9375)  Acc@5: 100.0000 (100.0000)LR: 1.024e-03
Train: 49 [  50/390]  Loss: 0.09843 (0.176)  Acc@1: 95.3125 (93.7500)  Acc@5: 100.0000 (99.9694)LR: 1.024e-03
Train: 49 [ 100/390]  Loss: 0.1694 (0.186)  Acc@1: 93.7500 (93.6108)  Acc@5: 100.0000 (99.9072)LR: 1.024e-03
Train: 49 [ 150/390]  Loss: 0.1239 (0.183)  Acc@1: 96.8750 (93.5120)  Acc@5: 100.0000 (99.9276)LR: 1.024e-03
Train: 49 [ 200/390]  Loss: 0.2166 (0.187)  Acc@1: 92.1875 (93.3613)  Acc@5: 100.0000 (99.9145)LR: 1.024e-03
Train: 49 [ 250/390]  Loss: 0.2635 (0.191)  Acc@1: 92.1875 (93.2956)  Acc@5: 100.0000 (99.9128)LR: 1.024e-03
Train: 49 [ 300/390]  Loss: 0.1614 (0.195)  Acc@1: 95.3125 (93.1790)  Acc@5: 100.0000 (99.9118)LR: 1.024e-03
Train: 49 [ 350/390]  Loss: 0.1885 (0.194)  Acc@1: 90.6250 (93.1847)  Acc@5: 100.0000 (99.9243)LR: 1.024e-03
Train: 49 [ 390/390]  Loss: 0.1316 (0.195)  Acc@1: 95.0000 (93.1480)  Acc@5: 100.0000 (99.9080)LR: 1.024e-03
train_acc 93.148000
Valid: 49 [   0/390]  Loss: 0.2994 (0.299)  Acc@1: 90.6250 (90.6250)  Acc@5: 100.0000 (100.0000)
Valid: 49 [  50/390]  Loss: 0.4981 (0.402)  Acc@1: 84.3750 (86.6422)  Acc@5: 98.4375 (99.2647)
Valid: 49 [ 100/390]  Loss: 0.4916 (0.383)  Acc@1: 82.8125 (87.1442)  Acc@5: 100.0000 (99.3657)
Valid: 49 [ 150/390]  Loss: 0.4233 (0.391)  Acc@1: 87.5000 (86.9826)  Acc@5: 100.0000 (99.3274)
Valid: 49 [ 200/390]  Loss: 0.3482 (0.399)  Acc@1: 92.1875 (86.8548)  Acc@5: 100.0000 (99.3315)
Valid: 49 [ 250/390]  Loss: 0.3735 (0.398)  Acc@1: 87.5000 (86.8526)  Acc@5: 100.0000 (99.3713)
Valid: 49 [ 300/390]  Loss: 0.1187 (0.398)  Acc@1: 96.8750 (86.7421)  Acc@5: 100.0000 (99.3667)
Valid: 49 [ 350/390]  Loss: 0.2085 (0.397)  Acc@1: 92.1875 (86.7566)  Acc@5: 98.4375 (99.3634)
Valid: 49 [ 390/390]  Loss: 0.6093 (0.392)  Acc@1: 80.0000 (86.8600)  Acc@5: 100.0000 (99.3560)
valid_acc 86.860000
epoch = 49   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('dil_conv_3x3', 1), ('dil_conv_5x5', 2), ('dil_conv_3x3', 1), ('dil_conv_5x5', 3), ('sep_conv_3x3', 2), ('skip_connect', 2), ('sep_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1513, 0.0417, 0.0400, 0.0742, 0.2634, 0.1757, 0.1430, 0.1106],
        [0.2993, 0.0304, 0.0305, 0.0473, 0.1895, 0.1131, 0.1469, 0.1429],
        [0.3091, 0.0530, 0.0447, 0.0879, 0.1337, 0.1183, 0.1452, 0.1081],
        [0.3734, 0.0377, 0.0357, 0.0584, 0.1629, 0.1162, 0.1020, 0.1136],
        [0.4244, 0.0324, 0.0298, 0.0602, 0.1245, 0.1068, 0.1093, 0.1125],
        [0.3810, 0.0531, 0.0467, 0.0883, 0.1391, 0.1035, 0.1022, 0.0861],
        [0.4239, 0.0357, 0.0357, 0.0574, 0.1298, 0.1426, 0.0891, 0.0858],
        [0.5201, 0.0315, 0.0297, 0.0596, 0.1063, 0.0796, 0.0793, 0.0940],
        [0.5989, 0.0252, 0.0259, 0.0402, 0.0691, 0.0684, 0.0812, 0.0911],
        [0.5568, 0.0394, 0.0371, 0.0663, 0.0854, 0.0703, 0.0675, 0.0774],
        [0.6064, 0.0295, 0.0294, 0.0462, 0.0924, 0.0651, 0.0688, 0.0623],
        [0.6567, 0.0263, 0.0258, 0.0484, 0.0560, 0.0479, 0.0674, 0.0715],
        [0.7471, 0.0194, 0.0203, 0.0291, 0.0398, 0.0448, 0.0479, 0.0517],
        [0.7540, 0.0170, 0.0176, 0.0238, 0.0433, 0.0430, 0.0427, 0.0587]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.0915, 0.1718, 0.1481, 0.0957, 0.1354, 0.1221, 0.1332, 0.1023],
        [0.1516, 0.1167, 0.1177, 0.1257, 0.1126, 0.1268, 0.1269, 0.1220],
        [0.0933, 0.1426, 0.1434, 0.1294, 0.1018, 0.1609, 0.1346, 0.0940],
        [0.1268, 0.1289, 0.1389, 0.1336, 0.0950, 0.1234, 0.1609, 0.0925],
        [0.1442, 0.0757, 0.0820, 0.1368, 0.1351, 0.1280, 0.1306, 0.1676],
        [0.0936, 0.1453, 0.1300, 0.1449, 0.1181, 0.1361, 0.1268, 0.1051],
        [0.1193, 0.1183, 0.1278, 0.1484, 0.1129, 0.1019, 0.1219, 0.1496],
        [0.1307, 0.0757, 0.0838, 0.1451, 0.1585, 0.1371, 0.1323, 0.1368],
        [0.1488, 0.0668, 0.0828, 0.1343, 0.1174, 0.1253, 0.1578, 0.1669],
        [0.1063, 0.1305, 0.1272, 0.1243, 0.1338, 0.1373, 0.1183, 0.1222],
        [0.1186, 0.1305, 0.1418, 0.1094, 0.1293, 0.1213, 0.1287, 0.1203],
        [0.1645, 0.0674, 0.0789, 0.1671, 0.1314, 0.1209, 0.1136, 0.1562],
        [0.1653, 0.0627, 0.0783, 0.1477, 0.1561, 0.1271, 0.1472, 0.1155],
        [0.1851, 0.0600, 0.0742, 0.1325, 0.1284, 0.1340, 0.1375, 0.1483]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
